diff --git a/Cargo.lock b/Cargo.lock index a64e007090..9c6883e8aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14001,6 +14001,27 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" +[[package]] +name = "tqdb" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "clap", + "colored", + "daft", + "iddqd", + "omicron-repl-utils", + "omicron-workspace-hack", + "reconfigurator-cli", + "reedline", + "serde_json", + "slog", + "tabled 0.15.0", + "trust-quorum", + "trust-quorum-test-utils", +] + [[package]] name = "tracing" version = "0.1.40" @@ -14165,6 +14186,7 @@ dependencies = [ name = "trust-quorum" version = "0.1.0" dependencies = [ + "anyhow", "assert_matches", "bcs", "bootstore", @@ -14172,6 +14194,7 @@ dependencies = [ "chacha20poly1305", "daft", "derive_more 0.99.20", + "dropshot", "gfss", "hex", "hkdf", @@ -14183,6 +14206,7 @@ dependencies = [ "rand 0.9.2", "secrecy 0.10.3", "serde", + "serde_json", "serde_with", "sha3", "slog", @@ -14192,10 +14216,28 @@ dependencies = [ "test-strategy", "thiserror 2.0.12", "tokio", + "trust-quorum-test-utils", "uuid", "zeroize", ] +[[package]] +name = "trust-quorum-test-utils" +version = "0.1.0" +dependencies = [ + "camino", + "daft", + "dropshot", + "gfss", + "iddqd", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "serde", + "serde_json", + "slog", + "trust-quorum", +] + [[package]] name = "try-lock" version = "0.2.5" diff --git a/Cargo.toml b/Cargo.toml index e526f23b11..0af37939fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -141,6 +141,8 @@ members = [ "test-utils", "trust-quorum", "trust-quorum/gfss", + "trust-quorum/test-utils", + "trust-quorum/tqdb", "typed-rng", "update-common", "update-engine", @@ -298,6 +300,8 @@ default-members = [ "sp-sim", "trust-quorum", "trust-quorum/gfss", + "trust-quorum/test-utils", + "trust-quorum/tqdb", "test-utils", "typed-rng", "update-common", @@ -460,6 +464,8 @@ gateway-test-utils = { path = "gateway-test-utils" } gateway-types = { path = "gateway-types" } gethostname = "0.5.0" gfss = { path = "trust-quorum/gfss" } +trust-quorum = { path = "trust-quorum" } +trust-quorum-test-utils = { path = "trust-quorum/test-utils" } glob = "0.3.2" guppy = "0.17.20" headers = "0.4.1" diff --git a/dev-tools/reconfigurator-cli/src/lib.rs b/dev-tools/reconfigurator-cli/src/lib.rs index 43be0311ae..4156b979b5 100644 --- a/dev-tools/reconfigurator-cli/src/lib.rs +++ b/dev-tools/reconfigurator-cli/src/lib.rs @@ -14,7 +14,7 @@ use iddqd::IdOrdMap; use indent_write::fmt::IndentWriter; use internal_dns_types::diff::DnsDiff; use itertools::Itertools; -use log_capture::LogCapture; +pub use log_capture::LogCapture; use nexus_inventory::CollectionBuilder; use nexus_reconfigurator_blippy::Blippy; use nexus_reconfigurator_blippy::BlippyReportSortKey; diff --git a/dev-tools/repl-utils/src/lib.rs b/dev-tools/repl-utils/src/lib.rs index 3a4a0c5547..f14f7a606e 100644 --- a/dev-tools/repl-utils/src/lib.rs +++ b/dev-tools/repl-utils/src/lib.rs @@ -9,6 +9,7 @@ use anyhow::anyhow; use anyhow::bail; use camino::Utf8Path; use clap::Parser; +use reedline::Prompt; use reedline::Reedline; use reedline::Signal; use std::fs::File; @@ -110,13 +111,24 @@ pub fn run_repl_from_file( pub fn run_repl_on_stdin( run_one: &mut dyn FnMut(C) -> anyhow::Result>, ) -> anyhow::Result<()> { - let mut ed = Reedline::create(); + let ed = Reedline::create(); let prompt = reedline::DefaultPrompt::new( reedline::DefaultPromptSegment::Empty, reedline::DefaultPromptSegment::Empty, ); + run_repl_on_stdin_customized(ed, &prompt, run_one) +} + +/// Runs a REPL using stdin/stdout with a customized `Reedline` and `Prompt` +/// +/// See docs for [`run_repl_on_stdin`] +pub fn run_repl_on_stdin_customized( + mut ed: Reedline, + prompt: &dyn Prompt, + run_one: &mut dyn FnMut(C) -> anyhow::Result>, +) -> anyhow::Result<()> { loop { - match ed.read_line(&prompt) { + match ed.read_line(prompt) { Ok(Signal::Success(buffer)) => { // Strip everything after '#' as a comment. let entry = match buffer.split_once('#') { diff --git a/trust-quorum/Cargo.toml b/trust-quorum/Cargo.toml index eaf141ddf2..0d6ac6863c 100644 --- a/trust-quorum/Cargo.toml +++ b/trust-quorum/Cargo.toml @@ -8,6 +8,7 @@ license = "MPL-2.0" workspace = true [dependencies] +anyhow.workspace = true bcs.workspace = true bootstore.workspace = true camino.workspace = true @@ -36,6 +37,18 @@ omicron-workspace-hack.workspace = true [dev-dependencies] assert_matches.workspace = true +dropshot.workspace = true omicron-test-utils.workspace = true proptest.workspace = true +serde_json.workspace = true test-strategy.workspace = true +trust-quorum-test-utils.workspace = true + +[features] +# Impl `PartialEq` and `Eq` for types implementing `subtle::ConstantTimeEq` when +# this feature is enabled. +# +# This is of unknown risk. The rust compiler may obviate the security of using +# subtle when we do this. On the other hand its very useful for testing and +# debugging outside of production. +danger_partial_eq_ct_wrapper = ["gfss/danger_partial_eq_ct_wrapper"] diff --git a/trust-quorum/gfss/Cargo.toml b/trust-quorum/gfss/Cargo.toml index 5802654f80..3b6ad9fdf1 100644 --- a/trust-quorum/gfss/Cargo.toml +++ b/trust-quorum/gfss/Cargo.toml @@ -21,3 +21,14 @@ omicron-workspace-hack.workspace = true [dev-dependencies] proptest.workspace = true test-strategy.workspace = true + +[features] + + +# Impl `PartialEq` and `Eq` for types implementing `subtle::ConstantTimeEq` when +# this feature is enabled. +# +# This is of unknown risk. The rust compiler may obviate the security of using +# subtle when we do this. On the other hand its very useful for testing and +# debugging outside of production. +danger_partial_eq_ct_wrapper = [] diff --git a/trust-quorum/gfss/src/gf256.rs b/trust-quorum/gfss/src/gf256.rs index 235cf37265..78fc2bc1f2 100644 --- a/trust-quorum/gfss/src/gf256.rs +++ b/trust-quorum/gfss/src/gf256.rs @@ -32,7 +32,7 @@ use zeroize::Zeroize; /// An element in a finite field of prime power 2^8 /// -/// We explicitly don't enable the equality operators to prevent ourselves from +/// We explicitly don't derive the equality operators to prevent ourselves from /// accidentally using those instead of the constant time ones. #[repr(transparent)] #[derive(Debug, Clone, Copy, Zeroize, Serialize, Deserialize)] @@ -120,6 +120,15 @@ impl ConstantTimeEq for Gf256 { } } +#[cfg(feature = "danger_partial_eq_ct_wrapper")] +impl PartialEq for Gf256 { + fn eq(&self, other: &Self) -> bool { + self.ct_eq(&other).into() + } +} +#[cfg(feature = "danger_partial_eq_ct_wrapper")] +impl Eq for Gf256 {} + impl Add for Gf256 { type Output = Self; diff --git a/trust-quorum/gfss/src/shamir.rs b/trust-quorum/gfss/src/shamir.rs index 2da11b83ba..49ea0a90a4 100644 --- a/trust-quorum/gfss/src/shamir.rs +++ b/trust-quorum/gfss/src/shamir.rs @@ -137,6 +137,16 @@ impl Share { } } +#[cfg(feature = "danger_partial_eq_ct_wrapper")] +impl PartialEq for Share { + fn eq(&self, other: &Self) -> bool { + self.x_coordinate == other.x_coordinate + && self.y_coordinates == other.y_coordinates + } +} +#[cfg(feature = "danger_partial_eq_ct_wrapper")] +impl Eq for Share {} + impl std::fmt::Debug for Share { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("KeyShareGf256").finish() diff --git a/trust-quorum/src/compute_key_share.rs b/trust-quorum/src/compute_key_share.rs index 8cc780f752..648519733f 100644 --- a/trust-quorum/src/compute_key_share.rs +++ b/trust-quorum/src/compute_key_share.rs @@ -8,17 +8,17 @@ //! share for that configuration it must collect a threshold of key shares from //! other nodes so that it can compute its own key share. -use crate::crypto::Sha3_256Digest; use crate::{ Alarm, Configuration, Epoch, NodeHandlerCtx, PeerMsgKind, PlatformId, }; use gfss::gf256::Gf256; use gfss::shamir::{self, Share}; -use slog::{Logger, error, o, warn}; +use slog::{Logger, error, o}; use std::collections::BTreeMap; /// In memory state that tracks retrieval of key shares in order to compute /// this node's key share for a given configuration. +#[derive(Debug, Clone)] pub struct KeyShareComputer { log: Logger, @@ -28,6 +28,17 @@ pub struct KeyShareComputer { collected_shares: BTreeMap, } +#[cfg(feature = "danger_partial_eq_ct_wrapper")] +impl PartialEq for KeyShareComputer { + fn eq(&self, other: &Self) -> bool { + self.config == other.config + && self.collected_shares == other.collected_shares + } +} + +#[cfg(feature = "danger_partial_eq_ct_wrapper")] +impl Eq for KeyShareComputer {} + impl KeyShareComputer { pub fn new( log: &Logger, @@ -54,7 +65,9 @@ impl KeyShareComputer { ctx: &mut impl NodeHandlerCtx, peer: PlatformId, ) { - if !self.collected_shares.contains_key(&peer) { + if self.config.members.contains_key(&peer) + && !self.collected_shares.contains_key(&peer) + { ctx.send(peer, PeerMsgKind::GetShare(self.config.epoch)); } } @@ -70,55 +83,29 @@ impl KeyShareComputer { epoch: Epoch, share: Share, ) -> bool { - // Are we trying to retrieve shares for `epoch`? - if epoch != self.config.epoch { - warn!( - self.log, - "Received Share from node with wrong epoch"; - "received_epoch" => %epoch, - "from" => %from - ); - return false; - } - - // Is the sender a member of the configuration `epoch`? - // Was the sender a member of the configuration at `old_epoch`? - let Some(expected_digest) = self.config.members.get(&from) else { - warn!( - self.log, - "Received Share from unexpected node"; - "epoch" => %epoch, - "from" => %from - ); + if !crate::validate_share(&self.log, &self.config, &from, epoch, &share) + { + // Logging done inside `validate_share` return false; }; - // Does the share hash match what we expect? - let mut digest = Sha3_256Digest::default(); - share.digest::(&mut digest.0); - if digest != *expected_digest { - error!( - self.log, - "Received share with invalid digest"; - "epoch" => %epoch, - "from" => %from - ); - return false; - } - // A valid share was received. Is it new? if self.collected_shares.insert(from, share).is_some() { return false; } - // Do we have enough shares to computer our rack share? + // Do we have enough shares to compute our rack share? if self.collected_shares.len() < self.config.threshold.0 as usize { return false; } + // Share indices are assigned according the configuration membership's + // key order, when the configuration is constructed. + // // What index are we in the configuration? This is our "x-coordinate" // for our key share calculation. We always start indexing from 1, since // 0 is the rack secret. + // let index = self.config.members.keys().position(|id| id == ctx.platform_id()); diff --git a/trust-quorum/src/configuration.rs b/trust-quorum/src/configuration.rs index a6057c62ed..8b116e6f4a 100644 --- a/trust-quorum/src/configuration.rs +++ b/trust-quorum/src/configuration.rs @@ -7,6 +7,7 @@ use crate::crypto::{EncryptedRackSecrets, RackSecret, Sha3_256Digest}; use crate::validators::ValidatedReconfigureMsg; use crate::{Epoch, PlatformId, Threshold}; +use daft::Diffable; use gfss::shamir::{Share, SplitError}; use iddqd::{IdOrdItem, id_upcast}; use omicron_uuid_kinds::RackUuid; @@ -31,7 +32,15 @@ pub enum ConfigurationError { /// /// Only valid for non-lrtq configurations #[derive( - Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, + Debug, + Clone, + PartialEq, + Eq, + PartialOrd, + Ord, + Serialize, + Deserialize, + Diffable, )] pub struct Configuration { /// Unique Id of the rack diff --git a/trust-quorum/src/coordinator_state.rs b/trust-quorum/src/coordinator_state.rs index 78e8c8b125..50cba4d839 100644 --- a/trust-quorum/src/coordinator_state.rs +++ b/trust-quorum/src/coordinator_state.rs @@ -4,12 +4,12 @@ //! State of a reconfiguration coordinator inside a [`crate::Node`] -use crate::NodeHandlerCtx; -use crate::crypto::{ - LrtqShare, PlaintextRackSecrets, Sha3_256Digest, ShareDigestLrtq, -}; +use crate::configuration::ConfigurationDiff; +use crate::crypto::{LrtqShare, PlaintextRackSecrets, ShareDigestLrtq}; use crate::validators::{ReconfigurationError, ValidatedReconfigureMsg}; use crate::{Configuration, Epoch, PeerMsgKind, PlatformId, RackSecret}; +use crate::{NodeHandlerCtx, ValidatedReconfigureMsgDiff}; +use daft::{Diffable, Leaf}; use gfss::shamir::Share; use slog::{Logger, error, info, o, warn}; use std::collections::{BTreeMap, BTreeSet}; @@ -27,7 +27,9 @@ use std::mem; /// allows progress to always be made with a full linearization of epochs. /// /// We allow some unused fields before we complete the coordination code +#[derive(Clone, Debug, Diffable)] pub struct CoordinatorState { + #[daft(ignore)] log: Logger, /// A copy of the message used to start this reconfiguration @@ -41,6 +43,34 @@ pub struct CoordinatorState { op: CoordinatorOperation, } +// For diffs we want to allow access to all fields, but not make them public in +// the `CoordinatorState` type itself. +impl<'daft> CoordinatorStateDiff<'daft> { + pub fn reconfigure_msg(&self) -> &ValidatedReconfigureMsgDiff<'daft> { + &self.reconfigure_msg + } + + pub fn configuration(&self) -> &ConfigurationDiff<'daft> { + &self.configuration + } + + pub fn op(&self) -> Leaf<&CoordinatorOperation> { + self.op + } +} + +#[cfg(feature = "danger_partial_eq_ct_wrapper")] +impl PartialEq for CoordinatorState { + fn eq(&self, other: &Self) -> bool { + self.reconfigure_msg == other.reconfigure_msg + && self.configuration == other.configuration + && self.op == other.op + } +} + +#[cfg(feature = "danger_partial_eq_ct_wrapper")] +impl Eq for CoordinatorState {} + impl CoordinatorState { /// Start coordinating a reconfiguration for a brand new trust quorum /// @@ -179,13 +209,14 @@ impl CoordinatorState { #[expect(unused)] CoordinatorOperation::CollectLrtqShares { members, shares } => {} CoordinatorOperation::Prepare { prepares, .. } => { - for (platform_id, (config, share)) in - prepares.clone().into_iter() - { + for (platform_id, (config, share)) in prepares.iter() { if ctx.connected().contains(&platform_id) { ctx.send( - platform_id, - PeerMsgKind::Prepare { config, share }, + platform_id.clone(), + PeerMsgKind::Prepare { + config: config.clone(), + share: share.clone(), + }, ); } } @@ -209,7 +240,6 @@ impl CoordinatorState { } => {} CoordinatorOperation::CollectLrtqShares { members, shares } => {} CoordinatorOperation::Prepare { prepares, prepare_acks } => { - let rack_id = self.reconfigure_msg.rack_id(); if let Some((config, share)) = prepares.get(&to) { ctx.send( to, @@ -285,39 +315,15 @@ impl CoordinatorState { "new_epoch" => new_epoch.to_string() )); - // Are we trying to retrieve shares for `epoch`? - if *old_epoch != epoch { - warn!( - log, - "Received Share from node with wrong epoch"; - "received_epoch" => %epoch, - "from" => %from - ); - return; - } - - // Was the sender a member of the configuration at `old_epoch`? - let Some(expected_digest) = old_config.members.get(&from) - else { - warn!( - log, - "Received Share from unexpected node"; - "received_epoch" => %epoch, - "from" => %from - ); + if !crate::validate_share( + &self.log, + &old_config, + &from, + epoch, + &share, + ) { + // Logging done inside `validate_share` return; - }; - - // Does the share hash match what we expect? - let mut digest = Sha3_256Digest::default(); - share.digest::(&mut digest.0); - if digest != *expected_digest { - error!( - log, - "Received share with invalid digest"; - "received_epoch" => %epoch, - "from" => %from - ); } // A valid share was received. Is it new? @@ -411,6 +417,12 @@ impl CoordinatorState { }; // Save the encrypted rack secrets in the current configuration + // + // A new configuration is always created with a `None` value + // for `encrypted_rack_secrets`, as it gets filled in here. + // + // If we change that it's a programmer error that will be caught + // immediately by our tests. assert!(self.configuration.encrypted_rack_secrets.is_none()); self.configuration.encrypted_rack_secrets = Some(new_encrypted_rack_secrets); @@ -467,6 +479,8 @@ impl CoordinatorState { } /// What should the coordinator be doing? +#[derive(Clone, Debug, Diffable)] +#[cfg_attr(feature = "danger_partial_eq_ct_wrapper", derive(PartialEq, Eq))] pub enum CoordinatorOperation { CollectShares { old_epoch: Epoch, diff --git a/trust-quorum/src/crypto.rs b/trust-quorum/src/crypto.rs index 69d33c6cd6..cdb9967733 100644 --- a/trust-quorum/src/crypto.rs +++ b/trust-quorum/src/crypto.rs @@ -45,6 +45,7 @@ const CHACHA20POLY1305_NONCE_LEN: usize = 12; // The key share format used for LRTQ #[derive(Clone, Serialize, Deserialize, Zeroize, ZeroizeOnDrop, From)] +#[cfg_attr(feature = "danger_partial_eq_ct_wrapper", derive(PartialEq, Eq))] pub struct LrtqShare(Vec); // We don't want to risk debug-logging the actual share contents, so implement @@ -74,18 +75,20 @@ impl LrtqShare { pub struct ShareDigestLrtq(Sha3_256Digest); #[derive( - Default, - Debug, - Clone, - PartialEq, - Eq, - PartialOrd, - Ord, - Serialize, - Deserialize, + Default, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, )] pub struct Sha3_256Digest(pub [u8; 32]); +impl std::fmt::Debug for Sha3_256Digest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "sha3 digest: ")?; + for v in self.0.as_slice() { + write!(f, "{:x?}", v)?; + } + Ok(()) + } +} + /// A boxed array containing rack secret data /// /// We explicitly choose to box the data so that it is not littered around diff --git a/trust-quorum/src/lib.rs b/trust-quorum/src/lib.rs index 8bb8d8de5d..aed8a518b9 100644 --- a/trust-quorum/src/lib.rs +++ b/trust-quorum/src/lib.rs @@ -9,8 +9,12 @@ //! All persistent state and all networking is managed outside of this //! implementation. +use crypto::Sha3_256Digest; +use daft::Diffable; use derive_more::Display; +use gfss::shamir::Share; use serde::{Deserialize, Serialize}; +use slog::{Logger, error, warn}; mod compute_key_share; mod configuration; @@ -22,16 +26,19 @@ mod node_ctx; mod persistent_state; mod validators; pub use configuration::Configuration; -pub use coordinator_state::{CoordinatorOperation, CoordinatorState}; +pub use coordinator_state::{ + CoordinatorOperation, CoordinatorState, CoordinatorStateDiff, +}; +pub use validators::ValidatedReconfigureMsgDiff; mod alarm; pub use alarm::Alarm; pub use crypto::RackSecret; pub use messages::*; -pub use node::Node; +pub use node::{Node, NodeDiff}; // public only for docs. pub use node_ctx::NodeHandlerCtx; -pub use node_ctx::{NodeCallerCtx, NodeCommonCtx, NodeCtx}; +pub use node_ctx::{NodeCallerCtx, NodeCommonCtx, NodeCtx, NodeCtxDiff}; pub use persistent_state::{PersistentState, PersistentStateSummary}; #[derive( @@ -46,7 +53,9 @@ pub use persistent_state::{PersistentState, PersistentStateSummary}; Serialize, Deserialize, Display, + Diffable, )] +#[daft(leaf)] pub struct Epoch(pub u64); impl Epoch { @@ -69,7 +78,9 @@ impl Epoch { Serialize, Deserialize, Display, + Diffable, )] +#[daft(leaf)] pub struct Threshold(pub u8); /// A unique identifier for a given trust quorum member. @@ -80,8 +91,17 @@ pub struct Threshold(pub u8); /// /// See RFDs 303 and 308 for more details. #[derive( - Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, + Debug, + Clone, + PartialEq, + Eq, + PartialOrd, + Ord, + Serialize, + Deserialize, + Diffable, )] +#[daft(leaf)] pub struct PlatformId { part_number: String, serial_number: String, @@ -108,9 +128,60 @@ impl PlatformId { } /// A container to make messages between trust quorum nodes routable -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Diffable)] +#[cfg_attr(feature = "danger_partial_eq_ct_wrapper", derive(PartialEq, Eq))] +#[daft(leaf)] pub struct Envelope { pub to: PlatformId, pub from: PlatformId, pub msg: PeerMsg, } + +/// Check if a received share is valid for a given configuration +/// +/// Return true if valid, false otherwise. +pub fn validate_share( + log: &Logger, + config: &Configuration, + from: &PlatformId, + epoch: Epoch, + share: &Share, +) -> bool { + // Are we trying to retrieve shares for `epoch`? + if epoch != config.epoch { + warn!( + log, + "Received Share from node with wrong epoch"; + "received_epoch" => %epoch, + "from" => %from + ); + return false; + } + + // Is the sender a member of the configuration `epoch`? + // Was the sender a member of the configuration at `old_epoch`? + let Some(expected_digest) = config.members.get(&from) else { + warn!( + log, + "Received Share from unexpected node"; + "epoch" => %epoch, + "from" => %from + ); + return false; + }; + + // Does the share hash match what we expect? + let mut digest = Sha3_256Digest::default(); + share.digest::(&mut digest.0); + if digest != *expected_digest { + error!( + log, + "Received share with invalid digest"; + "epoch" => %epoch, + "from" => %from + ); + return false; + } + + true +} diff --git a/trust-quorum/src/messages.rs b/trust-quorum/src/messages.rs index 052a8d04a4..3167cba500 100644 --- a/trust-quorum/src/messages.rs +++ b/trust-quorum/src/messages.rs @@ -24,12 +24,14 @@ pub struct ReconfigureMsg { /// Messages sent between trust quorum members over a sprockets channel #[derive(Debug, Clone, Serialize, Deserialize)] +#[cfg_attr(feature = "danger_partial_eq_ct_wrapper", derive(PartialEq, Eq))] pub struct PeerMsg { pub rack_id: RackUuid, pub kind: PeerMsgKind, } #[derive(Debug, Clone, Serialize, Deserialize)] +#[cfg_attr(feature = "danger_partial_eq_ct_wrapper", derive(PartialEq, Eq))] pub enum PeerMsgKind { /// Sent from a coordinator node to inform a peer about a new configuration Prepare { diff --git a/trust-quorum/src/node.rs b/trust-quorum/src/node.rs index a6613f9062..16503dbef8 100644 --- a/trust-quorum/src/node.rs +++ b/trust-quorum/src/node.rs @@ -23,6 +23,7 @@ use crate::{ Alarm, Configuration, CoordinatorState, Epoch, NodeHandlerCtx, PlatformId, messages::*, }; +use daft::{Diffable, Leaf}; use gfss::shamir::Share; use omicron_uuid_kinds::RackUuid; use slog::{Logger, error, info, o, warn}; @@ -32,7 +33,9 @@ use slog::{Logger, error, info, o, warn}; /// This is a `sans-io` implementation that is deterministic (except for /// `RackSecretGeneration`, which currently hardcodes use of an OsRng). This /// style is primarily for testing purposes. +#[derive(Debug, Clone, Diffable)] pub struct Node { + #[daft(ignore)] log: Logger, /// In memory state for when this node is coordinating a reconfiguration @@ -43,6 +46,29 @@ pub struct Node { key_share_computer: Option, } +// For diffs we want to allow access to all fields, but not make them public in +// the `Node` type itself. +impl NodeDiff<'_> { + pub fn coordinator_state(&self) -> Leaf> { + self.coordinator_state + } + + pub fn key_share_computer(&self) -> Leaf> { + self.key_share_computer + } +} + +#[cfg(feature = "danger_partial_eq_ct_wrapper")] +impl PartialEq for Node { + fn eq(&self, other: &Self) -> bool { + self.coordinator_state == other.coordinator_state + && self.key_share_computer == other.key_share_computer + } +} + +#[cfg(feature = "danger_partial_eq_ct_wrapper")] +impl Eq for Node {} + impl Node { pub fn new(log: &Logger, ctx: &mut impl NodeHandlerCtx) -> Node { let id_str = format!("{:?}", ctx.platform_id()); @@ -288,6 +314,19 @@ impl Node { from: PlatformId, config: Configuration, ) { + // The sender sent us a configuration even though we are not part of the + // configuration. This is a bug on the sender's part, but doesn't rise + // to the level of an alarm. Log an error. + if !config.members.contains_key(ctx.platform_id()) { + error!( + self.log, + "Received CommitAdvance, but not a member of configuration"; + "from" => %from, + "epoch" => %config.epoch + ); + return; + } + // We may have already advanced by the time we receive this message. // Let's check. if ctx.persistent_state().commits.contains(&config.epoch) { @@ -328,6 +367,7 @@ impl Node { config2: config.clone(), from: from.clone(), }); + return; } } else { ctx.update_persistent_state(|ps| { @@ -404,7 +444,7 @@ impl Node { } } - // We either were collectiong shares for an old epoch or haven't started + // We either were collecting shares for an old epoch or haven't started // yet. self.key_share_computer = Some(KeyShareComputer::new(&self.log, ctx, config)); @@ -435,7 +475,7 @@ impl Node { info!( self.log, concat!( - "Received 'GetShare'` from stale node. ", + "Received 'GetShare' from stale node. ", "Responded with 'CommitAdvance'." ); "from" => %from, @@ -542,6 +582,16 @@ impl Node { return; } + if !config.members.contains_key(ctx.platform_id()) { + error!( + self.log, + "Received Prepare when not a member of configuration"; + "from" => %from, + "prepare_epoch" => %config.epoch + ); + return; + } + // We always save the config and share if we haven't committed a later // configuration. If we have seen a newer `Prepare`, it's possible // that that configuration will not commit, and the latest committed @@ -568,7 +618,10 @@ impl Node { ); } // If we are coordinating for an older epoch, then we should stop - // coordinating. This epoch will never commit. + // coordinating. The configuration at this epoch will either never + // commit, or has already committed without us learning about it from + // Nexus. In either case the rest of the system has moved on and we + // should stop coordinating. if let Some(cs) = &self.coordinator_state { if msg_epoch > cs.reconfigure_msg().epoch() { // This prepare is for a newer configuration than the one we are diff --git a/trust-quorum/src/node_ctx.rs b/trust-quorum/src/node_ctx.rs index e3a4f7fed3..e7d36da7bd 100644 --- a/trust-quorum/src/node_ctx.rs +++ b/trust-quorum/src/node_ctx.rs @@ -6,7 +6,9 @@ use crate::{ Alarm, Envelope, PeerMsg, PeerMsgKind, PersistentState, PlatformId, + persistent_state::PersistentStateDiff, }; +use daft::{BTreeSetDiff, Diffable, Leaf}; use std::collections::BTreeSet; /// An API shared by [`NodeCallerCtx`] and [`NodeHandlerCtx`] @@ -67,6 +69,8 @@ pub trait NodeHandlerCtx: NodeCommonCtx { /// We separate access to this context via different APIs; namely [`NodeCallerCtx`] /// and [`NodeHandlerCtx`]. This statically prevents both the caller and /// [`crate::Node`] internals from performing improper mutations. +#[derive(Debug, Clone, Diffable)] +#[cfg_attr(feature = "danger_partial_eq_ct_wrapper", derive(PartialEq, Eq))] pub struct NodeCtx { /// The unique hardware ID of a sled platform_id: PlatformId, @@ -90,6 +94,34 @@ pub struct NodeCtx { alarms: BTreeSet, } +// For diffs we want to allow access to all fields, but not make them public in +// the `NodeCtx` type itself. +impl<'daft> NodeCtxDiff<'daft> { + pub fn platform_id(&self) -> Leaf<&PlatformId> { + self.platform_id + } + + pub fn persistent_state(&self) -> &PersistentStateDiff<'daft> { + &self.persistent_state + } + + pub fn persistent_state_changed(&self) -> Leaf<&bool> { + self.persistent_state_changed + } + + pub fn outgoing(&self) -> Leaf<&[Envelope]> { + self.outgoing + } + + pub fn connected(&self) -> &BTreeSetDiff<'daft, PlatformId> { + &self.connected + } + + pub fn alarms(&self) -> &BTreeSetDiff<'daft, Alarm> { + &self.alarms + } +} + impl NodeCtx { pub fn new(platform_id: PlatformId) -> NodeCtx { NodeCtx { diff --git a/trust-quorum/src/persistent_state.rs b/trust-quorum/src/persistent_state.rs index ba6d130627..d2a9a09039 100644 --- a/trust-quorum/src/persistent_state.rs +++ b/trust-quorum/src/persistent_state.rs @@ -9,6 +9,7 @@ use crate::crypto::LrtqShare; use crate::{Configuration, Epoch, PlatformId}; use bootstore::schemes::v0::SharePkgCommon as LrtqShareData; +use daft::Diffable; use gfss::shamir::Share; use iddqd::IdOrdMap; use omicron_uuid_kinds::{GenericUuid, RackUuid}; @@ -16,7 +17,8 @@ use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, BTreeSet}; /// All the persistent state for this protocol -#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[derive(Debug, Clone, Serialize, Deserialize, Default, Diffable)] +#[cfg_attr(feature = "danger_partial_eq_ct_wrapper", derive(PartialEq, Eq))] pub struct PersistentState { // If this node was an LRTQ node, sled-agent will start it with the ledger // data it read from disk. This allows us to upgrade from LRTQ. diff --git a/trust-quorum/src/validators.rs b/trust-quorum/src/validators.rs index aaf045d3aa..ffa361dc1f 100644 --- a/trust-quorum/src/validators.rs +++ b/trust-quorum/src/validators.rs @@ -7,6 +7,7 @@ use crate::configuration::ConfigurationError; use crate::messages::ReconfigureMsg; use crate::{Epoch, PersistentStateSummary, PlatformId, Threshold}; +use daft::{BTreeSetDiff, Diffable, Leaf}; use omicron_uuid_kinds::RackUuid; use slog::{Logger, error, info, warn}; use std::collections::BTreeSet; @@ -124,7 +125,7 @@ pub enum ReconfigurationError { /// A `ReconfigureMsg` that has been determined to be valid for the remainder /// of code paths. We encode this check into a type in a "parse, don't validate" /// manner. -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq, Diffable)] pub struct ValidatedReconfigureMsg { rack_id: RackUuid, epoch: Epoch, @@ -137,6 +138,34 @@ pub struct ValidatedReconfigureMsg { coordinator_id: PlatformId, } +// For diffs we want to allow access to all fields, but not make them public in +// the `ValidatedReconfigureMsg` type itself. +impl<'daft> ValidatedReconfigureMsgDiff<'daft> { + pub fn rack_id(&self) -> Leaf<&RackUuid> { + self.rack_id + } + + pub fn epoch(&self) -> Leaf<&Epoch> { + self.epoch + } + + pub fn last_committed_epoch(&self) -> Leaf> { + self.last_committed_epoch + } + + pub fn members(&self) -> &BTreeSetDiff<'daft, PlatformId> { + &self.members + } + + pub fn threshold(&self) -> Leaf<&Threshold> { + self.threshold + } + + pub fn coordinator_id(&self) -> Leaf<&PlatformId> { + self.coordinator_id + } +} + impl PartialEq for ReconfigureMsg { fn eq(&self, other: &ValidatedReconfigureMsg) -> bool { let ReconfigureMsg { diff --git a/trust-quorum/test-utils/Cargo.toml b/trust-quorum/test-utils/Cargo.toml new file mode 100644 index 0000000000..f2701c471a --- /dev/null +++ b/trust-quorum/test-utils/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "trust-quorum-test-utils" +version = "0.1.0" +edition = "2024" + +[lints] +workspace = true + +[dependencies] +camino.workspace = true +daft.workspace = true +dropshot.workspace = true +gfss = { workspace = true, features = ["danger_partial_eq_ct_wrapper"] } +iddqd.workspace = true +omicron-uuid-kinds.workspace = true +serde.workspace = true +serde_json.workspace = true +slog.workspace = true +trust-quorum = { workspace = true, features = ["danger_partial_eq_ct_wrapper"] } + +omicron-workspace-hack.workspace = true diff --git a/trust-quorum/test-utils/src/event.rs b/trust-quorum/test-utils/src/event.rs new file mode 100644 index 0000000000..6a573d9585 --- /dev/null +++ b/trust-quorum/test-utils/src/event.rs @@ -0,0 +1,33 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Events passed to our SUT/Nexus sim in both proptests and tqdb + +use crate::nexus::{NexusConfig, NexusReply}; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeSet; +use trust_quorum::{Epoch, PlatformId}; + +/// An event that can be fed into our system under test (SUT) +/// +/// Proptest generated `Action`s get translated into events at test execution +/// time and recorded for replay. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Event { + InitialSetup { + member_universe_size: usize, + config: NexusConfig, + crashed_nodes: BTreeSet, + }, + AbortConfiguration(Epoch), + SendNexusReplyOnUnderlay(NexusReply), + /// Pull an envelope off the bootstrap network and call `Node::handle` + DeliverEnvelope { + destination: PlatformId, + }, + /// Pull a `NexusReply` off the underlay network and update the `NexusState` + DeliverNexusReply, + CommitConfiguration(PlatformId), + Reconfigure(NexusConfig), +} diff --git a/trust-quorum/test-utils/src/event_log.rs b/trust-quorum/test-utils/src/event_log.rs new file mode 100644 index 0000000000..4319513a1e --- /dev/null +++ b/trust-quorum/test-utils/src/event_log.rs @@ -0,0 +1,40 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A mechanism for recording [`crate::Event`]s + +use super::Event; +use camino::Utf8Path; +use std::fs::File; +use std::io::{Seek, Write}; + +pub struct EventLog { + file: File, +} + +impl EventLog { + pub fn new(path: &Utf8Path) -> EventLog { + let mut file = File::create(path).unwrap(); + // We want to incremntally write an array of `Event`s. + // Start the array + file.write_all(b"[\n").expect("opening brace written"); + EventLog { file } + } + + pub fn record(&mut self, event: &Event) { + serde_json::to_writer_pretty(&mut self.file, event) + .expect("writing event succeeded"); + self.file.write_all(b",\n").expect("write succeeded"); + } +} + +impl Drop for EventLog { + fn drop(&mut self) { + // Backup over the trailing comma and newline + let _ = self.file.seek_relative(-2); + // Finish writing the array of events + let _ = self.file.write_all(b"\n]\n"); + let _ = self.file.sync_data(); + } +} diff --git a/trust-quorum/test-utils/src/lib.rs b/trust-quorum/test-utils/src/lib.rs new file mode 100644 index 0000000000..7eccc64f5a --- /dev/null +++ b/trust-quorum/test-utils/src/lib.rs @@ -0,0 +1,23 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Helpers for use by our proptests and tqdb + +mod event; +mod event_log; +pub mod nexus; +mod state; + +pub use event::Event; +pub use event_log::EventLog; +pub use state::TqState; + +use trust_quorum::PlatformId; + +/// All possible members used in a test +pub fn member_universe(size: usize) -> Vec { + (0..size) + .map(|serial| PlatformId::new("test".into(), serial.to_string())) + .collect() +} diff --git a/trust-quorum/test-utils/src/nexus.rs b/trust-quorum/test-utils/src/nexus.rs new file mode 100644 index 0000000000..a64acb39d0 --- /dev/null +++ b/trust-quorum/test-utils/src/nexus.rs @@ -0,0 +1,170 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Nexus related types for trust-quorum testing + +use daft::Diffable; +use iddqd::id_ord_map::RefMut; +use iddqd::{IdOrdItem, IdOrdMap, id_upcast}; +use omicron_uuid_kinds::RackUuid; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeSet; +use trust_quorum::{Epoch, PlatformId, ReconfigureMsg, Threshold}; + +// The operational state of nexus for a given configuration +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Diffable)] +pub enum NexusOp { + Committed, + Aborted, + Preparing, +} + +/// A single nexus configuration +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Diffable)] +pub struct NexusConfig { + pub op: NexusOp, + pub epoch: Epoch, + pub last_committed_epoch: Option, + pub coordinator: PlatformId, + pub members: BTreeSet, + // This is our `K` parameter + pub threshold: Threshold, + + // This is our `Z` parameter. + // + // Nexus can commit when it has seen K+Z prepare acknowledgements + // + // Only nexus needs to know this value since it alone determines when a + // commit may occur. + pub commit_crash_tolerance: u8, + + pub prepared_members: BTreeSet, + pub committed_members: BTreeSet, +} + +impl NexusConfig { + pub fn new( + epoch: Epoch, + last_committed_epoch: Option, + coordinator: PlatformId, + members: BTreeSet, + threshold: Threshold, + ) -> NexusConfig { + // We want a few extra nodes beyond `threshold` to ack before we commit. + // This is the number of nodes that can go offline while still allowing + // an unlock to occur. + let commit_crash_tolerance = match members.len() - threshold.0 as usize + { + 0..=1 => 0, + 2..=4 => 1, + 5..=7 => 2, + _ => 3, + }; + NexusConfig { + op: NexusOp::Preparing, + epoch, + last_committed_epoch, + coordinator, + members, + threshold, + commit_crash_tolerance, + prepared_members: BTreeSet::new(), + committed_members: BTreeSet::new(), + } + } + + pub fn to_reconfigure_msg(&self, rack_id: RackUuid) -> ReconfigureMsg { + ReconfigureMsg { + rack_id, + epoch: self.epoch, + last_committed_epoch: self.last_committed_epoch, + members: self.members.clone(), + threshold: self.threshold, + } + } + + // Are there enough prepared members to commit? + pub fn can_commit(&self) -> bool { + self.prepared_members.len() + >= (self.threshold.0 + self.commit_crash_tolerance) as usize + } +} + +impl IdOrdItem for NexusConfig { + type Key<'a> = Epoch; + + fn key(&self) -> Self::Key<'_> { + self.epoch + } + + id_upcast!(); +} + +/// A model of Nexus's view of the world during the test +#[derive(Debug, Clone, Diffable)] +pub struct NexusState { + // No reason to change the rack_id + pub rack_id: RackUuid, + + pub configs: IdOrdMap, +} + +impl NexusState { + #[allow(clippy::new_without_default)] + pub fn new() -> NexusState { + NexusState { rack_id: RackUuid::new_v4(), configs: IdOrdMap::new() } + } + + // Create a `ReconfigureMsg` for the latest nexus config + pub fn reconfigure_msg_for_latest_config( + &self, + ) -> (&PlatformId, ReconfigureMsg) { + let config = self.configs.iter().last().expect("at least one config"); + (&config.coordinator, config.to_reconfigure_msg(self.rack_id)) + } + + /// Abort the latest reconfiguration attempt + pub fn abort_reconfiguration(&mut self) { + let config = self.configs.iter().last().expect("at least one config"); + // Can only abort while preparing + assert_eq!(config.op, NexusOp::Preparing); + } + + pub fn latest_config(&self) -> &NexusConfig { + self.configs.iter().last().expect("at least one config") + } + + pub fn latest_config_mut(&mut self) -> RefMut<'_, NexusConfig> { + self.configs.iter_mut().last().expect("at least one config") + } + + pub fn last_committed_config(&self) -> Option<&NexusConfig> { + // IdOrdMap doesn't allow reverse iteration. + // We therefore iterate through all configs to find the latest committed one. + // We could track this out of band but that leaves more room for error. + let mut found: Option<&NexusConfig> = None; + for c in &self.configs { + if c.op == NexusOp::Committed { + found = Some(c) + } + } + found + } +} + +#[derive( + Debug, + Clone, + PartialOrd, + Ord, + PartialEq, + Eq, + Serialize, + Deserialize, + Diffable, +)] +pub enum NexusReply { + AckedPreparesFromCoordinator { epoch: Epoch, acks: BTreeSet }, + CommitAck { from: PlatformId, epoch: Epoch }, +} diff --git a/trust-quorum/test-utils/src/state.rs b/trust-quorum/test-utils/src/state.rs new file mode 100644 index 0000000000..35ae9f13e8 --- /dev/null +++ b/trust-quorum/test-utils/src/state.rs @@ -0,0 +1,888 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The entire state of our test system + +use crate::nexus::{ + NexusConfig, NexusOp, NexusReply, NexusState, NexusStateDiff, +}; +use crate::{Event, member_universe}; +use daft::{BTreeMapDiff, BTreeSetDiff, Diffable, Leaf}; +use iddqd::IdOrdMap; +use slog::Logger; +use std::collections::{BTreeMap, BTreeSet}; +use std::fmt::Display; +use trust_quorum::{ + Configuration, CoordinatorOperation, CoordinatorStateDiff, Envelope, Epoch, + Node, NodeCallerCtx, NodeCommonCtx, NodeCtx, NodeCtxDiff, NodeDiff, + PeerMsgKind, PlatformId, ValidatedReconfigureMsgDiff, +}; + +// The state of our entire system including the system under test and +// test specific infrastructure. +#[derive(Debug, Clone, Diffable)] +pub struct TqState { + /// A logger for our test + #[daft(ignore)] + pub log: Logger, + + /// Our system under test + pub sut: Sut, + + /// All in flight messages between nodes + pub bootstrap_network: BTreeMap>, + + /// All in flight responses to nexus. We don't model the requests, as those + /// are `Node` public method calls. But we don't want to synchronously + /// update nexus state as a result of those calls, because that ruins any + /// possible interleaving with other actions. + /// + /// This is a way to allow interleaving of nexus replies without changing + /// the Node API to accept a separate set of Nexus messages and return + /// messages. We may decide that we want to do that, but for now we'll stick + /// with a concrete `Node` method based API that is "triggered" by nexus + /// messages. + pub underlay_network: Vec, + + /// A model of Nexus's view of the world during the test + pub nexus: NexusState, + + /// A cache of our member universe, so we only have to generate it once + pub member_universe: Vec, + + /// All possible system faults in our test + pub faults: Faults, + + /// All configurations ever generated by a coordinator. + /// + /// If an epoch got skipped due to a crashed coordinator then there will not + /// be a configuration for that epoch. + pub all_coordinated_configs: IdOrdMap, + + /// Expunged nodes cannot be added to a cluster. We never reuse nodes in + /// this test. We include nodes here that may not know yet that they have + /// been expunged in the `Sut`. + pub expunged: BTreeSet, +} + +impl TqState { + pub fn new(log: Logger) -> TqState { + // We'll fill this in when applying the initial_config + let sut = Sut::empty(); + let member_universe = vec![]; + TqState { + log, + sut, + bootstrap_network: BTreeMap::new(), + underlay_network: Vec::new(), + nexus: NexusState::new(), + member_universe, + faults: Faults::default(), + all_coordinated_configs: IdOrdMap::new(), + expunged: BTreeSet::new(), + } + } + + /// Send the latest `ReconfigureMsg` from `Nexus` to the coordinator node + /// + /// If the node is not available, then abort the configuration at nexus + pub fn send_reconfigure_msg(&mut self) { + let (coordinator, msg) = self.nexus.reconfigure_msg_for_latest_config(); + let epoch_to_config = msg.epoch; + if self.faults.crashed_nodes.contains(coordinator) { + // We must abort the configuration. This mimics a timeout. + self.nexus.abort_reconfiguration(); + } else { + let (node, ctx) = self + .sut + .nodes + .get_mut(coordinator) + .expect("coordinator exists"); + + node.coordinate_reconfiguration(ctx, msg) + .expect("valid configuration"); + + // Do we have a `Configuration` for this epoch yet? + // + // For most reconfigurations, shares for the last committed + // configuration must be retrieved before the configuration is + // generated and saved in the persistent state. + let latest_persisted_config = + ctx.persistent_state().latest_config().expect("config exists"); + if latest_persisted_config.epoch == epoch_to_config { + // Save the configuration for later + self.all_coordinated_configs + .insert_unique(latest_persisted_config.clone()) + .expect("unique"); + } + } + } + + /// Check postcondition assertions after initial configuration + pub fn postcondition_initial_configuration(&mut self) { + let (coordinator, msg) = self.nexus.reconfigure_msg_for_latest_config(); + + // The coordinator should have received the `ReconfigureMsg` from Nexus + if !self.faults.crashed_nodes.contains(coordinator) { + let (node, ctx) = self + .sut + .nodes + .get_mut(coordinator) + .expect("coordinator exists"); + let mut connected_members = 0; + // The coordinator should start preparing by sending a `PrepareMsg` to all + // connected nodes in the membership set. + for member in + msg.members.iter().filter(|&id| id != coordinator).cloned() + { + if self.faults.is_connected(coordinator.clone(), member.clone()) + { + connected_members += 1; + let msg_found = ctx.envelopes().any(|envelope| { + envelope.to == member + && envelope.from == *coordinator + && matches!( + envelope.msg.kind, + PeerMsgKind::Prepare { .. } + ) + }); + assert!(msg_found); + } + } + assert_eq!(connected_members, ctx.envelopes().count()); + + // The coordinator should be in the prepare phase + let cs = node.get_coordinator_state().expect("is coordinating"); + assert!(matches!(cs.op(), CoordinatorOperation::Prepare { .. })); + + // The persistent state should have changed + assert!(ctx.persistent_state_change_check_and_reset()); + assert!(ctx.persistent_state().has_prepared(msg.epoch)); + assert!(ctx.persistent_state().latest_committed_epoch().is_none()); + } + } + + /// Put any outgoing coordinator messages from the latest configuration on the wire + pub fn send_envelopes_from_coordinator(&mut self) { + let coordinator = { + let (coordinator, _) = + self.nexus.reconfigure_msg_for_latest_config(); + coordinator.clone() + }; + self.send_envelopes_from(&coordinator); + } + + pub fn send_envelopes_from(&mut self, id: &PlatformId) { + let (_, ctx) = self.sut.nodes.get_mut(id).expect("node exists"); + for envelope in ctx.drain_envelopes() { + let msgs = + self.bootstrap_network.entry(envelope.to.clone()).or_default(); + msgs.push(envelope); + } + } + + pub fn apply_event(&mut self, event: Event) { + match event { + Event::InitialSetup { + member_universe_size, + config, + crashed_nodes, + } => { + self.apply_event_initial_config( + member_universe_size, + config, + crashed_nodes, + ); + } + Event::AbortConfiguration(epoch) => { + self.apply_event_abort_configuration(epoch) + } + Event::SendNexusReplyOnUnderlay(reply) => { + self.apply_event_send_nexus_reply_on_underlay(reply) + } + Event::DeliverEnvelope { destination } => { + self.apply_event_deliver_envelope(destination); + } + Event::DeliverNexusReply => { + self.apply_event_deliver_nexus_reply(); + } + Event::CommitConfiguration(dest) => { + self.apply_event_commit(dest); + } + Event::Reconfigure(nexus_config) => { + self.apply_event_reconfigure(nexus_config) + } + } + } + + fn apply_event_initial_config( + &mut self, + member_universe_size: usize, + config: NexusConfig, + crashed_nodes: BTreeSet, + ) { + // Generate the member universe + self.member_universe = member_universe(member_universe_size); + // Create the SUT nodes + self.sut = Sut::new(&self.log, self.member_universe.clone()); + + self.faults.crashed_nodes = crashed_nodes; + + // Inform nexus about the initial configuration + self.nexus.configs.insert_unique(config).expect("new config"); + + // Establish bootstrap network connections between live nodes + for (from, (node, ctx)) in self + .sut + .nodes + .iter_mut() + .filter(|(id, _)| !self.faults.crashed_nodes.contains(id)) + { + for to in self.member_universe.iter().filter(|id| { + !self.faults.crashed_nodes.contains(id) && from != *id + }) { + node.on_connect(ctx, to.clone()); + } + } + + self.send_reconfigure_msg(); + + // Check the results of the initial setup + self.postcondition_initial_configuration(); + + // Put the coordinator's outgoing messages on the wire if there are any + self.send_envelopes_from_coordinator(); + } + + fn apply_event_commit(&mut self, id: PlatformId) { + let rack_id = self.nexus.rack_id; + let latest_config = self.nexus.latest_config(); + let (node, ctx) = + self.sut.nodes.get_mut(&id).expect("destination exists"); + node.commit_configuration(ctx, rack_id, latest_config.epoch) + .expect("commit succeeded"); + + self.underlay_network.push(NexusReply::CommitAck { + from: id, + epoch: latest_config.epoch, + }); + } + + fn apply_event_send_nexus_reply_on_underlay(&mut self, reply: NexusReply) { + self.underlay_network.push(reply); + } + + fn apply_event_deliver_nexus_reply(&mut self) { + let mut latest_config = self.nexus.latest_config_mut(); + let reply = self.underlay_network.pop().expect("reply exists"); + match reply { + NexusReply::AckedPreparesFromCoordinator { epoch, acks } => { + if epoch == latest_config.epoch { + latest_config.prepared_members.extend(acks); + } + } + NexusReply::CommitAck { from, epoch } => { + if latest_config.epoch == epoch { + latest_config.committed_members.insert(from); + } + } + } + } + + fn apply_event_abort_configuration(&mut self, epoch: Epoch) { + let mut latest_config = self.nexus.latest_config_mut(); + assert_eq!(epoch, latest_config.epoch); + latest_config.op = NexusOp::Aborted; + } + + fn apply_event_deliver_envelope(&mut self, destination: PlatformId) { + let envelope = self + .bootstrap_network + .get_mut(&destination) + .unwrap() + .pop() + .expect("envelope in bootstrap network"); + let (node, ctx) = + self.sut.nodes.get_mut(&envelope.to).expect("destination exists"); + node.handle(ctx, envelope.from, envelope.msg); + + // If this is the first time we've seen a configuration, track it + // + // We have to do this here because for reconfigurations, shares + // for the last committed reconfiguration are gathered before + // the config is created. We don't know exactly when config + // generation occurs, but know that it happens after envelopes + // are delivered, except for configurations that don't have + // a last committed config. This is normally the initial + // configuration, but can be later ones if the initial config + // is aborted. + if ctx.persistent_state_change_check_and_reset() { + if let Some(latest_config) = ctx.persistent_state().latest_config() + { + if !self + .all_coordinated_configs + .contains_key(&latest_config.epoch) + { + // The coordinator must be the first node to create + // the configuration. + assert_eq!(&latest_config.coordinator, ctx.platform_id()); + + self.all_coordinated_configs + .insert_unique(latest_config.clone()) + .expect("unique config"); + } + } + } + + // Send any messages as a result of handling this message + send_envelopes(ctx, &mut self.bootstrap_network); + + // Remove any destinations with zero messages in-flight + self.bootstrap_network.retain(|_, msgs| !msgs.is_empty()); + } + + fn apply_event_reconfigure(&mut self, nexus_config: NexusConfig) { + self.nexus.configs.insert_unique(nexus_config).expect("new config"); + self.send_reconfigure_msg(); + self.send_envelopes_from_coordinator(); + } +} + +/// Broken out of `TqState` to alleviate borrow checker woes +fn send_envelopes( + ctx: &mut NodeCtx, + bootstrap_network: &mut BTreeMap>, +) { + for envelope in ctx.drain_envelopes() { + let envelopes = + bootstrap_network.entry(envelope.to.clone()).or_default(); + envelopes.push(envelope); + } +} + +/// The system under test +/// +/// This is our real code. +#[derive(Debug, Clone, Diffable)] +pub struct Sut { + /// All nodes in the member universe + pub nodes: BTreeMap, +} + +impl Sut { + pub fn empty() -> Sut { + Sut { nodes: BTreeMap::new() } + } + + pub fn new(log: &Logger, universe: Vec) -> Sut { + let nodes = universe + .into_iter() + .map(|id| { + let mut ctx = NodeCtx::new(id.clone()); + let node = Node::new(log, &mut ctx); + (id, (node, ctx)) + }) + .collect(); + Sut { nodes } + } +} + +/// Faults in our system. It's useful to keep these self contained and not +/// in separate fields in `TestState` so that we can access them all at once +/// independently of other `TestState` fields. +#[derive(Default, Debug, Clone, Diffable)] +pub struct Faults { + // We allow nodes to crash and restart and therefore track crashed nodes here. + // + // A crashed node is implicitly disconnected from every other node. We don't + // bother storing the pairs in `disconnected_nodes`, but instead check both + // fields when necessary. + pub crashed_nodes: BTreeSet, + + /// The set of disconnected nodes + pub disconnected_nodes: DisconnectedNodes, +} + +impl Faults { + pub fn is_connected(&self, node1: PlatformId, node2: PlatformId) -> bool { + !self.crashed_nodes.contains(&node1) + && !self.crashed_nodes.contains(&node2) + && !self.disconnected_nodes.contains(node1, node2) + } +} + +/// For cardinality purposes, we assume all nodes are connected and explicitly +/// disconnect some of them. This allows us to track and compare much less data. +#[derive(Default, Debug, Clone, Diffable)] +pub struct DisconnectedNodes { + // We sort each pair on insert for quick lookups + pairs: BTreeSet<(PlatformId, PlatformId)>, +} + +impl DisconnectedNodes { + // Return true if the pair is newly inserted + pub fn insert(&mut self, node1: PlatformId, node2: PlatformId) -> bool { + assert_ne!(node1, node2); + + let pair = if node1 < node2 { (node1, node2) } else { (node2, node1) }; + self.pairs.insert(pair) + } + + // Return true if the pair of nodes is disconnected, false otherwise + pub fn contains(&self, node1: PlatformId, node2: PlatformId) -> bool { + assert_ne!(node1, node2); + let pair = if node1 < node2 { (node1, node2) } else { (node2, node1) }; + self.pairs.contains(&pair) + } +} + +/***************************************************************************** + * + * Diff related display code + * + *****************************************************************************/ + +/// Diff Display functionality for `TqState` +/// +/// All diff related code lives in `test-utils`, because we enable the +/// trust-quorum feature `danger_partial_eq_ct_wrapper` in this crate. We +/// don't enable it for all uses of the `trust_quorum` crate, especially in +/// production. +/// +/// Since we only use it for human readable output in test tools - at least for +/// now, we put it behind a feature flag and implement all display functionality +/// here. +impl Display for TqStateDiff<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // The set of SUT nodes never changes + for (&id, &leaf) in self.sut.nodes.common.iter() { + if leaf.is_modified() { + writeln!(f, "Node changed: {id}")?; + let (node_diff, ctx_diff) = leaf.diff_pair(); + display_node_diff(node_diff, f)?; + display_node_ctx_diff(ctx_diff, f)?; + + // Add a blank line between modified nodes + writeln!(f)?; + } + } + + display_bootstrap_network_diff(&self.bootstrap_network, f)?; + display_underlay_network_diff(&self.underlay_network, f)?; + display_nexus_state_diff(&self.nexus, f)?; + display_faults_diff(&self.faults, f)?; + display_expunged_diff(&self.expunged, f)?; + + Ok(()) + } +} + +fn display_expunged_diff( + diff: &BTreeSetDiff<'_, PlatformId>, + f: &mut std::fmt::Formatter<'_>, +) -> std::fmt::Result { + if !diff.added.is_empty() { + writeln!(f, "expunged nodes:")?; + for id in &diff.added { + writeln!(f, " {id}")?; + } + } + Ok(()) +} + +fn display_faults_diff( + diff: &FaultsDiff<'_>, + f: &mut std::fmt::Formatter<'_>, +) -> std::fmt::Result { + if !diff.crashed_nodes.added.is_empty() { + writeln!(f, " Nodes crashed:")?; + for id in &diff.crashed_nodes.added { + writeln!(f, " {id}")?; + } + } + if !diff.crashed_nodes.removed.is_empty() { + writeln!(f, " nodes started:")?; + for id in &diff.crashed_nodes.removed { + writeln!(f, " {id}")?; + } + } + + if !diff.disconnected_nodes.pairs.added.is_empty() { + writeln!(f, " nodes disconnected from each other:")?; + for pair in &diff.disconnected_nodes.pairs.added { + writeln!(f, " {}, {}", pair.0, pair.1)?; + } + } + if !diff.disconnected_nodes.pairs.removed.is_empty() { + writeln!(f, " nodes connected to each other:")?; + for pair in &diff.disconnected_nodes.pairs.removed { + writeln!(f, " {}, {}", pair.0, pair.1)?; + } + } + Ok(()) +} + +fn display_nexus_state_diff( + diff: &NexusStateDiff<'_>, + f: &mut std::fmt::Formatter<'_>, +) -> std::fmt::Result { + if diff.configs.modified().count() != 0 { + writeln!(f, " nexus state changed:")?; + } + + // Nexus configs can only be added or modified + for c in &diff.configs.added { + writeln!(f, " config added at epoch {}, op: {:?}", c.epoch, c.op)?; + } + for c in diff.configs.modified_diff() { + writeln!(f, " config modified at epoch {}", c.epoch.before)?; + if c.op.is_modified() { + let op = c.op.diff_pair(); + writeln!(f, " op changed: {:?} -> {:?}", op.before, op.after)?; + } + for id in c.prepared_members.added { + writeln!(f, " new prepare ack received: {id}")?; + } + for id in c.committed_members.added { + writeln!(f, " new commit ack received: {id}")?; + } + } + + Ok(()) +} + +fn display_underlay_network_diff( + diff: &Leaf<&[NexusReply]>, + f: &mut std::fmt::Formatter<'_>, +) -> std::fmt::Result { + if diff.is_unchanged() { + return Ok(()); + } + + let before: BTreeSet<_> = diff.before.iter().collect(); + let after: BTreeSet<_> = diff.after.iter().collect(); + + let added = after.difference(&before).count(); + let removed = before.difference(&after).count(); + + writeln!(f, " {added} new nexus replies in flight on underlay network")?; + writeln!( + f, + " {removed} nexus replies delivered to nexus from underlay network", + )?; + + Ok(()) +} + +fn display_bootstrap_network_diff( + diff: &BTreeMapDiff<'_, PlatformId, Vec>, + f: &mut std::fmt::Formatter<'_>, +) -> std::fmt::Result { + if !diff.added.is_empty() { + writeln!(f, " messages newly in flight on bootstrap network:")?; + for id in diff.added.keys() { + writeln!(f, " destination: {id}")?; + } + } + + if !diff.removed.is_empty() { + writeln!(f, " all messages delivered from bootstrap network:")?; + for id in diff.removed.keys() { + writeln!(f, " destination: {id}")?; + } + } + + if diff.unchanged_keys().count() != 0 { + writeln!(f, " messages remain in flight from bootstrap network:")?; + for id in diff.unchanged_keys() { + writeln!(f, " destination: {id}")?; + } + } + Ok(()) +} + +// Walk a `NodeCtxDiff` and format it for display +fn display_node_ctx_diff( + diff: NodeCtxDiff<'_>, + f: &mut std::fmt::Formatter<'_>, +) -> std::fmt::Result { + if !diff.persistent_state().configs.added.is_empty() { + writeln!(f, " config added to persistent state: ")?; + for c in &diff.persistent_state().configs.added { + writeln!(f, " epoch: {}", c.epoch)?; + } + } + if !diff.persistent_state().configs.removed.is_empty() { + writeln!(f, " config removed from persistent state: ")?; + for c in &diff.persistent_state().configs.removed { + writeln!(f, " epoch: {}", c.epoch)?; + } + } + + if !diff.persistent_state().shares.added.is_empty() { + writeln!(f, " our share added to persistent state: ")?; + for e in diff.persistent_state().shares.added.keys() { + writeln!(f, " epoch: {e}")?; + } + } + if !diff.persistent_state().shares.removed.is_empty() { + writeln!(f, " our share removed from persistent state: ")?; + for e in diff.persistent_state().shares.removed.keys() { + writeln!(f, " epoch: {e}")?; + } + } + + if !diff.persistent_state().commits.added.is_empty() { + writeln!(f, " commit added to persistent state: ")?; + for e in &diff.persistent_state().commits.added { + writeln!(f, " epoch: {e}")?; + } + } + if !diff.persistent_state().commits.removed.is_empty() { + writeln!(f, " commit removed from persistent state: ")?; + for e in &diff.persistent_state().commits.removed { + writeln!(f, " epoch: {e}")?; + } + } + + if diff.outgoing().is_modified() { + writeln!(f, " messages sent to or delivered from bootstrap network")?; + } + + if !diff.connected().added.is_empty() { + writeln!(f, " nodes connected:")?; + for id in &diff.connected().added { + writeln!(f, " {id}")?; + } + } + + if !diff.connected().removed.is_empty() { + writeln!(f, " nodes disconnected:")?; + for id in &diff.connected().removed { + writeln!(f, " {id}")?; + } + } + + if !diff.alarms().added.is_empty() { + writeln!(f, " alarms triggered:")?; + for alarm in &diff.alarms().added { + writeln!(f, " {alarm:?}")?; + } + } + + if !diff.alarms().removed.is_empty() { + writeln!(f, " alarms cleared:")?; + for alarm in &diff.alarms().removed { + writeln!(f, " {alarm:?}")?; + } + } + + Ok(()) +} + +// Walk a `NodeDiff` and format it for display +fn display_node_diff( + node_diff: NodeDiff<'_>, + f: &mut std::fmt::Formatter<'_>, +) -> std::fmt::Result { + // Show changes in `Node::coordinator_state` + if node_diff.coordinator_state().is_modified() { + writeln!(f, " coordinator state changed: ")?; + if node_diff.coordinator_state().before.is_none() { + writeln!( + f, + " started coordinating at epoch {}", + node_diff + .coordinator_state() + .after + .unwrap() + .reconfigure_msg() + .epoch() + )?; + } else if node_diff.coordinator_state().after.is_none() { + writeln!( + f, + " stopped coordinating at epoch {}", + node_diff + .coordinator_state() + .before + .unwrap() + .reconfigure_msg() + .epoch() + )?; + } else { + let before = node_diff.coordinator_state().before.unwrap(); + let after = node_diff.coordinator_state().after.unwrap(); + + // They are both `Some`, so figure out what changed + // by recursing + let diff = before.diff(after); + display_coordinator_state_diff(diff, f)?; + } + } + + // Show changes in `Node::key_share_computer` + if node_diff.key_share_computer().is_modified() { + writeln!(f, " key share computer changed: ")?; + if node_diff.key_share_computer().before.is_none() { + writeln!( + f, + " started computing key share at epoch {}", + node_diff.key_share_computer().after.unwrap().config().epoch + )?; + } else if node_diff.key_share_computer().after.is_none() { + writeln!( + f, + " stopped computing key share at epoch {}", + node_diff.key_share_computer().before.unwrap().config().epoch + )?; + } else { + writeln!( + f, + " computing key share at epochs: {} -> {}", + node_diff.key_share_computer().before.unwrap().config().epoch, + node_diff.key_share_computer().after.unwrap().config().epoch + )?; + } + } + + Ok(()) +} + +pub fn display_coordinator_state_diff( + diff: CoordinatorStateDiff<'_>, + f: &mut std::fmt::Formatter<'_>, +) -> std::fmt::Result { + display_validated_reconfigure_msg_diff(diff.reconfigure_msg(), f)?; + + // Configuration contains roughly the same information as a + // `ValidatedReconfigureMsg`. Let's report the only relevant change. + if diff.configuration().encrypted_rack_secrets.is_modified() { + writeln!(f, " encrypted rack secrets changed")?; + } + + display_coordinator_operation_diff(diff.op().diff_pair(), f)?; + + Ok(()) +} + +pub fn display_validated_reconfigure_msg_diff( + diff: &ValidatedReconfigureMsgDiff<'_>, + f: &mut std::fmt::Formatter<'_>, +) -> std::fmt::Result { + // diff.rack_id changes when tqdb `rewind` command is used, which makes it + // confusing. It never changes inside tests, so no need to diff it. + if diff.epoch().is_modified() { + writeln!( + f, + " epoch: {} -> {}", + diff.epoch().before, + diff.epoch().after + )?; + } + if diff.last_committed_epoch().is_modified() { + writeln!( + f, + " last committed epoch: {:?} -> {:?}", + diff.last_committed_epoch().before, + diff.last_committed_epoch().after + )?; + } + if !diff.members().added.is_empty() { + writeln!(f, " added members:")?; + for member in &diff.members().added { + writeln!(f, " {member}")?; + } + } + if !diff.members().removed.is_empty() { + writeln!(f, " removed members:")?; + for member in &diff.members().removed { + writeln!(f, " {member}")?; + } + } + if diff.threshold().is_modified() { + writeln!( + f, + " threshold: {} -> {}", + diff.threshold().before, + diff.threshold().after + )?; + } + // Always write out the coordinator id. It's useful for digging. + writeln!( + f, + " coordinator: {} -> {}", + diff.coordinator_id().before, + diff.coordinator_id().after, + )?; + + Ok(()) +} + +pub fn display_coordinator_operation_diff( + diff: Leaf<&CoordinatorOperation>, + f: &mut std::fmt::Formatter<'_>, +) -> std::fmt::Result { + if diff.is_unchanged() { + return Ok(()); + } + + // If the same variant changed contents, compare them. Otherwise report the + // change in variants. + match (diff.before, diff.after) { + ( + CoordinatorOperation::CollectShares { + old_epoch, + old_collected_shares, + .. + }, + CoordinatorOperation::CollectShares { + old_epoch: after_old_epoch, + old_collected_shares: after_old_collected_shares, + .. + }, + ) => { + // If the collection epoch changed, then only report that + if old_epoch != after_old_epoch { + #[allow(clippy::uninlined_format_args)] + writeln!( + f, + " collecting shares: epoch changed: {} -> {}", + old_epoch, after_old_epoch + )?; + } else if old_collected_shares != after_old_collected_shares { + writeln!( + f, + " collected shares changed at epoch: {old_epoch}", + )?; + } + } + ( + CoordinatorOperation::CollectLrtqShares { shares: before, .. }, + CoordinatorOperation::CollectLrtqShares { shares: after, .. }, + ) => { + if before != after { + writeln!(f, " collected lrtq shares differ")?; + } + } + ( + CoordinatorOperation::Prepare { prepare_acks: before, .. }, + CoordinatorOperation::Prepare { prepare_acks: after, .. }, + ) => { + if before != after { + writeln!(f, " received prepare acks differ")?; + } + } + (before, after) => { + writeln!( + f, + " coordinator operation changed: {} -> {}", + before.name(), + after.name() + )?; + } + } + + Ok(()) +} diff --git a/trust-quorum/tests/cluster.rs b/trust-quorum/tests/cluster.rs index 9bc7da94c6..c4ddd620da 100644 --- a/trust-quorum/tests/cluster.rs +++ b/trust-quorum/tests/cluster.rs @@ -5,10 +5,8 @@ //! Property based test driving multiple trust quorum nodes use daft::Diffable; -use iddqd::id_ord_map::RefMut; -use iddqd::{IdOrdItem, IdOrdMap, id_upcast}; +use dropshot::test_util::log_prefix_for_test; use omicron_test_utils::dev::test_setup_log; -use omicron_uuid_kinds::RackUuid; use prop::sample::Index; use proptest::collection::{btree_set, size_range}; use proptest::prelude::*; @@ -17,265 +15,18 @@ use slog::{Logger, info, o}; use std::collections::{BTreeMap, BTreeSet}; use test_strategy::{Arbitrary, proptest}; use trust_quorum::{ - Configuration, CoordinatorOperation, Envelope, Epoch, Node, NodeCallerCtx, - NodeCommonCtx, NodeCtx, PeerMsgKind, PlatformId, ReconfigureMsg, Threshold, + CoordinatorOperation, Epoch, NodeCommonCtx, PlatformId, Threshold, +}; +use trust_quorum_test_utils::TqState; +use trust_quorum_test_utils::{ + Event, EventLog, + nexus::{NexusConfig, NexusOp, NexusReply}, }; - -/// The system under test -/// -/// This is our real code. -pub struct Sut { - /// All nodes in the member universe - pub nodes: BTreeMap, -} - -impl Sut { - pub fn new(log: &Logger, universe: Vec) -> Sut { - let nodes = universe - .into_iter() - .map(|id| { - let mut ctx = NodeCtx::new(id.clone()); - let node = Node::new(log, &mut ctx); - (id, (node, ctx)) - }) - .collect(); - Sut { nodes } - } -} - -#[derive(Debug, PartialEq, Eq)] -pub enum NexusOp { - Committed, - Aborted, - Preparing, -} - -/// A single nexus configuration -#[derive(Debug)] -pub struct NexusConfig { - op: NexusOp, - epoch: Epoch, - last_committed_epoch: Option, - coordinator: PlatformId, - members: BTreeSet, - // This is our `K` parameter - threshold: Threshold, - - // This is our `Z` parameter. - // - // Nexus can commit when it has seen K+Z prepare acknowledgements - // - // Only nexus needs to know this value since it alone determines when a - // commit may occur. - commit_crash_tolerance: u8, - - prepared_members: BTreeSet, - committed_members: BTreeSet, -} - -impl NexusConfig { - pub fn new( - epoch: Epoch, - last_committed_epoch: Option, - coordinator: PlatformId, - members: BTreeSet, - threshold: Threshold, - ) -> NexusConfig { - // We want a few extra nodes beyond `threshold` to ack before we commit. - // This is the number of nodes that can go offline while still allowing - // an unlock to occur. - let commit_crash_tolerance = match members.len() - threshold.0 as usize - { - 0..=1 => 0, - 2..=4 => 1, - 5..=7 => 2, - _ => 3, - }; - NexusConfig { - op: NexusOp::Preparing, - epoch, - last_committed_epoch, - coordinator, - members, - threshold, - commit_crash_tolerance, - prepared_members: BTreeSet::new(), - committed_members: BTreeSet::new(), - } - } - - pub fn to_reconfigure_msg(&self, rack_id: RackUuid) -> ReconfigureMsg { - ReconfigureMsg { - rack_id, - epoch: self.epoch, - last_committed_epoch: self.last_committed_epoch, - members: self.members.clone(), - threshold: self.threshold, - } - } - - // Are there enough prepared members to commit? - pub fn can_commit(&self) -> bool { - self.prepared_members.len() - >= (self.threshold.0 + self.commit_crash_tolerance) as usize - } -} - -impl IdOrdItem for NexusConfig { - type Key<'a> = Epoch; - - fn key(&self) -> Self::Key<'_> { - self.epoch - } - - id_upcast!(); -} - -/// A model of Nexus's view of the world during the test -pub struct NexusState { - // No reason to change the rack_id - pub rack_id: RackUuid, - - pub configs: IdOrdMap, -} - -impl NexusState { - pub fn new() -> NexusState { - NexusState { rack_id: RackUuid::new_v4(), configs: IdOrdMap::new() } - } - - // Create a `ReconfigureMsg` for the latest nexus config - pub fn reconfigure_msg_for_latest_config( - &self, - ) -> (&PlatformId, ReconfigureMsg) { - let config = self.configs.iter().last().expect("at least one config"); - (&config.coordinator, config.to_reconfigure_msg(self.rack_id)) - } - - /// Abort the latest reconfiguration attempt - pub fn abort_reconfiguration(&mut self) { - let config = self.configs.iter().last().expect("at least one config"); - // Can only abort while preparing - assert_eq!(config.op, NexusOp::Preparing); - } - - pub fn latest_config(&self) -> &NexusConfig { - self.configs.iter().last().expect("at least one config") - } - - pub fn latest_config_mut(&mut self) -> RefMut<'_, NexusConfig> { - self.configs.iter_mut().last().expect("at least one config") - } - - pub fn last_committed_config(&self) -> Option<&NexusConfig> { - // IdOrdMap doesn't allow reverse iteration. - // We therefore iterate through all configs to find the latest committed one. - // We could track this out of band but that leaves more room for error. - let mut found: Option<&NexusConfig> = None; - for c in &self.configs { - if c.op == NexusOp::Committed { - found = Some(c) - } - } - found - } -} - -/// Faults in our system. It's useful to keep these self contained and not -/// in separate fields in `TestState` so that we can access them all at once -/// independently of other `TestState` fields. -#[derive(Default)] -pub struct Faults { - // We allow nodes to crash and restart and therefore track crashed nodes here. - // - // A crashed node is implicitly disconnected from every other node. We don't - // bother storing the pairs in `disconnected_nodes`, but instead check both - // fields when necessary. - pub crashed_nodes: BTreeSet, - - /// The set of disconnected nodes - pub disconnected_nodes: DisconnectedNodes, -} - -impl Faults { - pub fn is_connected(&self, node1: PlatformId, node2: PlatformId) -> bool { - !self.crashed_nodes.contains(&node1) - && !self.crashed_nodes.contains(&node2) - && !self.disconnected_nodes.contains(node1, node2) - } -} - -/// For cardinality purposes, we assume all nodes are connected and explicitly -/// disconnect some of them. This allows us to track and compare much less data. -#[derive(Default)] -pub struct DisconnectedNodes { - // We sort each pair on insert for quick lookups - pairs: BTreeSet<(PlatformId, PlatformId)>, -} - -impl DisconnectedNodes { - // Return true if the pair is newly inserted - pub fn insert(&mut self, node1: PlatformId, node2: PlatformId) -> bool { - assert_ne!(node1, node2); - - let pair = if node1 < node2 { (node1, node2) } else { (node2, node1) }; - self.pairs.insert(pair) - } - - // Return true if the pair of nodes is disconnected, false otherwise - pub fn contains(&self, node1: PlatformId, node2: PlatformId) -> bool { - assert_ne!(node1, node2); - let pair = if node1 < node2 { (node1, node2) } else { (node2, node1) }; - self.pairs.contains(&pair) - } -} - -pub enum NexusReply { - CommitAck { from: PlatformId, epoch: Epoch }, -} /// The state of our test +#[derive(Clone, Diffable)] struct TestState { - /// A logger for our test - pub log: Logger, - - /// Our system under test - pub sut: Sut, - - /// All in flight messages between nodes - pub bootstrap_network: BTreeMap>, - - /// All in flight responses to nexus. We don't model the requests, as those - /// are `Node` public method calls. But we don't want to synchronously - /// update nexus state as a result of those calls, because that ruins any - /// possible interleaving with other actions. - /// - /// This is a way to allow interleaving of nexus replies without changing - /// the Node API to accept a separate set of Nexus messages and return - /// messages. We may decide that we want to do that, but for now we'll stick - /// with a concrete `Node` method based API that is "triggered" by nexus - /// messages. - pub underlay_network: Vec, - - /// A model of Nexus's view of the world during the test - pub nexus: NexusState, - - /// A cache of our member universe, so we only have to generate it once - pub member_universe: Vec, - - /// All possible system faults in our test - pub faults: Faults, - - /// All configurations ever generated by a coordinator. - /// - /// If an epoch got skipped due to a crashed coordinator then there will not - /// be a configuration for that epoch. - pub all_coordinated_configs: IdOrdMap, - - /// Expunged nodes cannot be added to a cluster. We never reuse nodes in - /// this test. We include nodes here that may not know yet that they have - /// been expunged in the `Sut`. - pub expunged: BTreeSet, + pub tq_state: TqState, /// Keep track of the number of generated `Action`s that get skipped /// @@ -287,29 +38,23 @@ struct TestState { impl TestState { pub fn new(log: Logger) -> TestState { - let sut = Sut::new(&log, member_universe()); - TestState { - log: log.new(o!("component" => "tq-proptest")), - sut, - bootstrap_network: BTreeMap::new(), - underlay_network: Vec::new(), - nexus: NexusState::new(), - member_universe: member_universe(), - faults: Faults::default(), - all_coordinated_configs: IdOrdMap::new(), - expunged: BTreeSet::new(), - skipped_actions: 0, - } + TestState { tq_state: TqState::new(log), skipped_actions: 0 } } - pub fn create_nexus_initial_config( - &mut self, + fn initial_config_event( + &self, config: GeneratedConfiguration, - ) { + down_nodes: BTreeSet, + ) -> Event { + // `tq_state` doesn't create the member universe until the first event is + // applied. We duplicate it here so we can create that initial config + // event. + let member_universe = + trust_quorum_test_utils::member_universe(MEMBER_UNIVERSE_SIZE); let members: BTreeSet = config .members .iter() - .map(|index| self.member_universe[*index].clone()) + .map(|index| member_universe[*index].clone()) .collect(); let threshold = Threshold(usize::max( 2, @@ -319,135 +64,21 @@ impl TestState { let coordinator = members.first().cloned().expect("at least one member"); let last_committed_epoch = None; - let nexus_config = NexusConfig::new( + let config = NexusConfig::new( epoch, last_committed_epoch, coordinator, members, threshold, ); - self.nexus.configs.insert_unique(nexus_config).expect("new config"); - } - - pub fn setup_initial_connections(&mut self, down_nodes: BTreeSet) { - self.faults.crashed_nodes = down_nodes + let crashed_nodes = down_nodes .into_iter() - .map(|index| self.member_universe[index].clone()) + .map(|index| member_universe[index].clone()) .collect(); - - for (from, (node, ctx)) in self - .sut - .nodes - .iter_mut() - .filter(|(id, _)| !self.faults.crashed_nodes.contains(id)) - { - for to in self.member_universe.iter().filter(|id| { - !self.faults.crashed_nodes.contains(id) && from != *id - }) { - node.on_connect(ctx, to.clone()); - } - } - } - - /// Send the latest `ReconfigureMsg` from `Nexus` to the coordinator node - /// - /// If the node is not available, then abort the configuration at nexus - pub fn send_reconfigure_msg(&mut self) { - let (coordinator, msg) = self.nexus.reconfigure_msg_for_latest_config(); - let epoch_to_config = msg.epoch; - if self.faults.crashed_nodes.contains(coordinator) { - // We must abort the configuration. This mimics a timeout. - self.nexus.abort_reconfiguration(); - } else { - let (node, ctx) = self - .sut - .nodes - .get_mut(coordinator) - .expect("coordinator exists"); - - node.coordinate_reconfiguration(ctx, msg) - .expect("valid configuration"); - - // Do we have a `Configuration` for this epoch yet? - // - // For most reconfigurations, shares for the last committed - // configuration must be retrieved before the configuration is - // generated and saved in the persistent state. - let latest_persisted_config = - ctx.persistent_state().latest_config().expect("config exists"); - if latest_persisted_config.epoch == epoch_to_config { - // Save the configuration for later - self.all_coordinated_configs - .insert_unique(latest_persisted_config.clone()) - .expect("unique"); - } - } - } - - /// Check postcondition assertions after initial configuration - pub fn postcondition_initial_configuration( - &mut self, - ) -> Result<(), TestCaseError> { - let (coordinator, msg) = self.nexus.reconfigure_msg_for_latest_config(); - - // The coordinator should have received the `ReconfigureMsg` from Nexus - if !self.faults.crashed_nodes.contains(coordinator) { - let (node, ctx) = self - .sut - .nodes - .get_mut(coordinator) - .expect("coordinator exists"); - let mut connected_members = 0; - // The coordinator should start preparing by sending a `PrepareMsg` to all - // connected nodes in the membership set. - for member in - msg.members.iter().filter(|&id| id != coordinator).cloned() - { - if self.faults.is_connected(coordinator.clone(), member.clone()) - { - connected_members += 1; - let msg_found = ctx.envelopes().any(|envelope| { - envelope.to == member - && envelope.from == *coordinator - && matches!( - envelope.msg.kind, - PeerMsgKind::Prepare { .. } - ) - }); - prop_assert!(msg_found); - } - } - assert_eq!(connected_members, ctx.envelopes().count()); - - // The coordinator should be in the prepare phase - let cs = node.get_coordinator_state().expect("is coordinating"); - assert!(matches!(cs.op(), CoordinatorOperation::Prepare { .. })); - - // The persistent state should have changed - assert!(ctx.persistent_state_change_check_and_reset()); - assert!(ctx.persistent_state().has_prepared(msg.epoch)); - assert!(ctx.persistent_state().latest_committed_epoch().is_none()); - } - - Ok(()) - } - - /// Put any outgoing coordinator messages from the latest configuration on the wire - pub fn send_envelopes_from_coordinator(&mut self) { - let coordinator = { - let (coordinator, _) = - self.nexus.reconfigure_msg_for_latest_config(); - coordinator.clone() - }; - self.send_envelopes_from(&coordinator); - } - - pub fn send_envelopes_from(&mut self, id: &PlatformId) { - let (_, ctx) = self.sut.nodes.get_mut(id).expect("node exists"); - for envelope in ctx.drain_envelopes() { - let msgs = - self.bootstrap_network.entry(envelope.to.clone()).or_default(); - msgs.push(envelope); + Event::InitialSetup { + member_universe_size: MEMBER_UNIVERSE_SIZE, + config, + crashed_nodes, } } @@ -455,174 +86,107 @@ impl TestState { pub fn run_actions( &mut self, actions: Vec, + event_log: &mut EventLog, ) -> Result<(), TestCaseError> { for action in actions { - let skipped = match action { - Action::DeliverEnvelopes(indices) => { - self.action_deliver_envelopes(indices) - } - Action::PollPrepareAcks => self.action_poll_prepare_acks(), - Action::Commit(indices) => self.action_commit(indices), - Action::DeliverNexusReplies(n) => { - self.action_deliver_nexus_replies(n) - } - Action::Reconfigure { - num_added_nodes, - removed_nodes, - threshold, - coordinator, - } => self.action_reconfigure( - num_added_nodes, - removed_nodes, - threshold, - coordinator, - ), - }; - - if skipped { - self.skipped_actions += 1; - } else { + let events = self.action_to_events(action); + for event in &events { + event_log.record(event); + } + let check_invariants = !events.is_empty(); + for event in events { + self.tq_state.apply_event(event); + } + if check_invariants { self.check_invariants()?; + } else { + self.skipped_actions += 1; } } Ok(()) } - // Deliver network messages to generated destinations - fn action_deliver_envelopes(&mut self, indices: Vec) -> bool { - let destinations: Vec<_> = - self.bootstrap_network.keys().cloned().collect(); - if destinations.is_empty() { - // nothing to do - return true; - } - for index in indices { - let id = index.get(&destinations); - if let Some(envelope) = - self.bootstrap_network.get_mut(id).unwrap().pop() - { - let (node, ctx) = - self.sut.nodes.get_mut(id).expect("destination exists"); - node.handle(ctx, envelope.from, envelope.msg); - - // If this is the first time we've seen a configuration, track it - // - // We have to do this here because for reconfigurations, shares - // for the last committed reconfiguration are gathered before - // the config is created. We don't know exactly when config - // generation occurs, but know that it happens after envelopes - // are delivered, except for configurations that don't have - // a last committed config. This is normally the initial - // configuration, but can be later ones if the initial config - // is aborted. - if ctx.persistent_state_change_check_and_reset() { - if let Some(latest_config) = - ctx.persistent_state().latest_config() - { - if !self - .all_coordinated_configs - .contains_key(&latest_config.epoch) - { - // The coordinator must be the first node to create - // the configuration. - assert_eq!( - &latest_config.coordinator, - ctx.platform_id() - ); - - self.all_coordinated_configs - .insert_unique(latest_config.clone()) - .expect("unique config"); - } - } - } - - // Send any messages as a result of handling this message - send_envelopes(ctx, &mut self.bootstrap_network); + fn action_to_events(&self, action: Action) -> Vec { + match action { + Action::DeliverEnvelopes(indices) => { + self.action_to_events_deliver_envelopes(indices) + } + Action::PollPrepareAcks => { + self.action_to_events_poll_prepare_acks() } + Action::Commit(indices) => self.action_to_events_commit(indices), + Action::DeliverNexusReplies(n) => { + self.action_to_events_deliver_nexus_replies(n) + } + Action::Reconfigure { + num_added_nodes, + removed_nodes, + threshold, + coordinator, + } => self.action_to_events_reconfigure( + num_added_nodes, + removed_nodes, + threshold, + coordinator, + ), } - - // Remove any destinations with zero messages in-flight - self.bootstrap_network.retain(|_, msgs| !msgs.is_empty()); - - false } - // Call `Node::commit_reconfiguration` for nodes that have prepared and have - // not yet acked their commit. - fn action_commit(&mut self, indices: Vec) -> bool { - let rack_id = self.nexus.rack_id; - let latest_config = self.nexus.latest_config(); - if latest_config.op != NexusOp::Committed { - return true; - } - let committable: Vec<_> = latest_config - .prepared_members - .difference(&latest_config.committed_members) - .collect(); - - if committable.is_empty() { - // All members have committed - self.skipped_actions += 1; - return true; + fn action_to_events_deliver_envelopes( + &self, + indices: Vec, + ) -> Vec { + let mut events = vec![]; + let destinations: Vec<_> = + self.tq_state.bootstrap_network.keys().cloned().collect(); + if destinations.is_empty() { + // nothing to do + return events; } - // We shouldn't be calling commit twice or sending multiple replies - // to nexus, but a random bunch of indices might result in that. We - // therefore track nodes that have committed already. - let mut committed: BTreeSet = BTreeSet::new(); - + // Add an event only if there is actually an envelope to send + let mut counts = BTreeMap::new(); for index in indices { - let id = *index.get(&committable); - if committed.contains(id) { - continue; + let id = index.get(&destinations); + let count = counts.entry(id).or_insert(0usize); + *count += 1; + let num_envelopes = self + .tq_state + .bootstrap_network + .get(id) + .expect("destination exists") + .len(); + if *count <= num_envelopes { + events.push(Event::DeliverEnvelope { destination: id.clone() }); } - let (node, ctx) = - self.sut.nodes.get_mut(id).expect("destination exists"); - node.commit_configuration(ctx, rack_id, latest_config.epoch) - .expect("commit succeeded"); - committed.insert(id.clone()); - } - - let epoch = latest_config.epoch; - for from in committed { - self.underlay_network.push(NexusReply::CommitAck { from, epoch }); } - false - } - fn action_deliver_nexus_replies(&mut self, n: usize) -> bool { - let mut config = self.nexus.latest_config_mut(); - let n = usize::min(n, self.underlay_network.len()); - for reply in self.underlay_network.drain(0..n) { - match reply { - NexusReply::CommitAck { from, epoch } => { - if config.epoch == epoch { - config.committed_members.insert(from); - } - } - } - } - false + events } - /// Poll the coordinator for acks if nexus is preparing, and commit - /// if enough acks have been received. - fn action_poll_prepare_acks(&mut self) -> bool { - let mut latest_config = self.nexus.latest_config_mut(); + fn action_to_events_poll_prepare_acks(&self) -> Vec { + let mut events = vec![]; + let latest_config = self.tq_state.nexus.latest_config(); if latest_config.op != NexusOp::Preparing { // No point in checking. Commit or abort has occurred. - return true; + return events; } // If the coordinator has crashed then Nexus should abort. // Crashing is not actually implemented yet, but it will be. - if self.faults.crashed_nodes.contains(&latest_config.coordinator) { - latest_config.op = NexusOp::Aborted; + if self + .tq_state + .faults + .crashed_nodes + .contains(&latest_config.coordinator) + { + events.push(Event::AbortConfiguration(latest_config.epoch)); + return events; } // Lookup the coordinator node let (coordinator, ctx) = self + .tq_state .sut .nodes .get(&latest_config.coordinator) @@ -635,7 +199,7 @@ impl TestState { .latest_config() .map_or(Epoch(0), |c| c.epoch); if coordinator_epoch != latest_config.epoch { - return true; + return events; } // Poll the coordinator for acks. @@ -644,68 +208,66 @@ impl TestState { // crashed and nexus is still preparing. // // In a real system this request would go over the network, but would - // end up at the same place. It's not apparent that its worth the - // complexity here to delay poll replies to Nexus, but we can do that - // if necessary and then deliver them when the `DeliverNexusReplies` - // action fires. + // end up at the same place. let cs = coordinator .get_coordinator_state() .expect("coordinator is coordinating"); - latest_config.prepared_members.extend(cs.op().acked_prepares()); - - // Commit if possible - if latest_config.can_commit() { - info!(self.log, "nexus committed"; - "epoch" => %latest_config.epoch, - "coordinator" => %latest_config.coordinator - ); - - latest_config.op = NexusOp::Committed; - - let new_members = latest_config.members.clone(); - let new_epoch = latest_config.epoch; + // Put the reply on the network + events.push(Event::SendNexusReplyOnUnderlay( + NexusReply::AckedPreparesFromCoordinator { + epoch: coordinator_epoch, + acks: cs.op().acked_prepares(), + }, + )); + events + } - // Expunge any removed nodes from the last committed configuration - if let Some(last_committed_epoch) = - latest_config.last_committed_epoch - { - // Release our mutable borrow - drop(latest_config); + fn action_to_events_commit(&self, indices: Vec) -> Vec { + let mut events = vec![]; + let latest_config = self.tq_state.nexus.latest_config(); + if latest_config.op != NexusOp::Committed { + return events; + } + let committable: Vec<_> = latest_config + .prepared_members + .difference(&latest_config.committed_members) + .collect(); - let last_committed_config = self - .nexus - .configs - .get(&last_committed_epoch) - .expect("config exists"); + if committable.is_empty() { + return events; + } - let expunged = last_committed_config - .members - .difference(&new_members) - .cloned(); + // De-duplicate the Index->PlatformId mapping + let mut nodes: BTreeSet = BTreeSet::new(); + for index in indices { + let id = *index.get(&committable); + nodes.insert(id.clone()); + } + for node in nodes { + events.push(Event::CommitConfiguration(node)); + } + events + } - for e in expunged { - info!( - self.log, - "expunged node"; - "epoch" => %new_epoch, - "platform_id" => %e); - self.expunged.insert(e); - } - } + fn action_to_events_deliver_nexus_replies(&self, n: usize) -> Vec { + let mut events = vec![]; + let n = usize::min(n, self.tq_state.underlay_network.len()); + for _ in 0..n { + events.push(Event::DeliverNexusReply); } - false + events } - fn action_reconfigure( - &mut self, + fn action_to_events_reconfigure( + &self, num_added_nodes: usize, removed_nodes: Vec, threshold: Index, coordinator: Selector, - ) -> bool { - let latest_epoch = self.nexus.latest_config().epoch; - let last_committed_config = self.nexus.last_committed_config(); + ) -> Vec { + let latest_epoch = self.tq_state.nexus.latest_config().epoch; + let last_committed_config = self.tq_state.nexus.last_committed_config(); // We must leave at least one node available to coordinate between the // new and old configurations. let (new_members, coordinator) = match last_committed_config { @@ -720,7 +282,7 @@ impl TestState { let num_nodes_to_add = usize::min( MEMBER_UNIVERSE_SIZE - c.members.len() - - self.expunged.len(), + - self.tq_state.expunged.len(), possible_num_nodes_to_add, ); @@ -737,7 +299,7 @@ impl TestState { // We can only start a reconfiguration if Nexus has an // acknowledgement that at least one node has seen the commit. if c.committed_members.is_empty() { - return true; + return vec![]; } let coordinator = coordinator.select(c.committed_members.iter()); @@ -762,11 +324,13 @@ impl TestState { // Just pick the first set of nodes in `member_universe` // that are not in the current membership and not expunged. let mut nodes_to_add = BTreeSet::new(); - for id in self.member_universe.iter() { + for id in self.tq_state.member_universe.iter() { if nodes_to_add.len() == num_nodes_to_add { break; } - if !self.expunged.contains(id) && !c.members.contains(id) { + if !self.tq_state.expunged.contains(id) + && !c.members.contains(id) + { nodes_to_add.insert(id.clone()); } } @@ -785,11 +349,12 @@ impl TestState { // We are generating a new config if num_added_nodes < MIN_CLUSTER_SIZE { // Nothing to do here. - return true; + return vec![]; } // Pick the first `num_added_nodes` from member_universe // It's as good a choice as any and deterministic let new_members: BTreeSet<_> = self + .tq_state .member_universe .iter() .take(num_added_nodes) @@ -819,9 +384,7 @@ impl TestState { new_members, threshold, ); - self.nexus.configs.insert_unique(nexus_config).expect("new config"); - self.send_reconfigure_msg(); - false + vec![Event::Reconfigure(nexus_config)] } /// At every point during the running of the test, invariants over the system @@ -845,8 +408,9 @@ impl TestState { fn invariant_all_nodes_have_same_configuration_per_epoch( &self, ) -> Result<(), TestCaseError> { - for (id, (_, ctx)) in &self.sut.nodes { + for (id, (_, ctx)) in &self.tq_state.sut.nodes { let diff = self + .tq_state .all_coordinated_configs .diff(&ctx.persistent_state().configs); // No new configs exist @@ -872,8 +436,9 @@ impl TestState { &self, ) -> Result<(), TestCaseError> { let (acked, epoch) = { - let latest_config = self.nexus.latest_config(); + let latest_config = self.tq_state.nexus.latest_config(); let (node, _) = self + .tq_state .sut .nodes .get(&latest_config.coordinator) @@ -900,7 +465,8 @@ impl TestState { // Make sure the coordinator actually is coordinating for this epoch for id in acked { - let (_, ctx) = self.sut.nodes.get(&id).expect("node exists"); + let (_, ctx) = + self.tq_state.sut.nodes.get(&id).expect("node exists"); prop_assert!(ctx.persistent_state().has_prepared(epoch)); } @@ -916,13 +482,14 @@ impl TestState { fn invariant_nodes_have_committed_if_nexus_has_acks( &self, ) -> Result<(), TestCaseError> { - let latest_config = self.nexus.latest_config(); + let latest_config = self.tq_state.nexus.latest_config(); if latest_config.op != NexusOp::Committed { return Ok(()); } for id in &latest_config.committed_members { - let (_, ctx) = self.sut.nodes.get(&id).expect("node exists"); + let (_, ctx) = + self.tq_state.sut.nodes.get(&id).expect("node exists"); let ps = ctx.persistent_state(); prop_assert!(ps.commits.contains(&latest_config.epoch)); prop_assert!(ps.has_prepared(latest_config.epoch)); @@ -943,7 +510,7 @@ impl TestState { fn invariant_nodes_not_coordinating_and_computing_key_share_simultaneously( &self, ) -> Result<(), TestCaseError> { - for (id, (node, _)) in &self.sut.nodes { + for (id, (node, _)) in &self.tq_state.sut.nodes { prop_assert!( !(node.get_coordinator_state().is_some() && node.is_computing_key_share()), @@ -957,7 +524,7 @@ impl TestState { // Ensure there has been no alarm at any node fn invariant_no_alarms(&self) -> Result<(), TestCaseError> { - for (id, (_, ctx)) in &self.sut.nodes { + for (id, (_, ctx)) in &self.tq_state.sut.nodes { let alarms = ctx.alarms(); prop_assert!( alarms.is_empty(), @@ -970,18 +537,6 @@ impl TestState { } } -/// Broken out of `TestState` to alleviate borrow checker woes -fn send_envelopes( - ctx: &mut NodeCtx, - bootstrap_network: &mut BTreeMap>, -) { - for envelope in ctx.drain_envelopes() { - let envelopes = - bootstrap_network.entry(envelope.to.clone()).or_default(); - envelopes.push(envelope); - } -} - // A high-level set of generated actions to drive the test forward. #[derive(Debug, Arbitrary)] #[allow(clippy::large_enum_variant)] @@ -1056,13 +611,7 @@ pub struct GeneratedConfiguration { /// still be duplicated due to the shift implementation used. Therefore we /// instead just choose from a constrained set of usize values that we can /// use directly as indexes into our fixed size structure for all tests. - /// - /// Note that we intentionally set the max set size to MAX_CLUSTER_SIZE-1. - /// This is because we always want to include the coordinator in the - /// configuration, but its value may not be chosen randomly. In this case, - /// we have to add it to the actual membership set we generate from this - /// configuration with [`TestState::generated_config_to_reconfigure_msg`]. - #[strategy(btree_set(0..=MEMBER_UNIVERSE_SIZE, MIN_CLUSTER_SIZE..MAX_CLUSTER_SIZE))] + #[strategy(btree_set(0..MEMBER_UNIVERSE_SIZE, MIN_CLUSTER_SIZE..MAX_CLUSTER_SIZE))] pub members: BTreeSet, /// An index is roughly equivalent to a threshold, since a threshold cannot @@ -1073,20 +622,13 @@ pub struct GeneratedConfiguration { pub threshold: Index, } -/// All possible members used in a test -fn member_universe() -> Vec { - (0..=MEMBER_UNIVERSE_SIZE) - .map(|serial| PlatformId::new("test".into(), serial.to_string())) - .collect() -} - #[derive(Debug, Arbitrary)] pub struct TestInput { initial_config: GeneratedConfiguration, // We choose a set of nodes to be crashed, resulting in them being // disconnected from every other node. - #[strategy(btree_set(0..=MEMBER_UNIVERSE_SIZE, 0..MAX_INITIAL_DOWN_NODES))] + #[strategy(btree_set(0..MEMBER_UNIVERSE_SIZE, 0..MAX_INITIAL_DOWN_NODES))] initial_down_nodes: BTreeSet, #[any(size_range(MIN_ACTIONS..MAX_ACTIONS).lift())] actions: Vec, @@ -1095,28 +637,28 @@ pub struct TestInput { #[proptest] fn test_trust_quorum_protocol(input: TestInput) { let logctx = test_setup_log("test_trust_quorum_protocol"); + let (parent_dir, prefix) = log_prefix_for_test(logctx.test_name()); + let event_log_path = parent_dir.join(format!("{prefix}-events.json")); + let mut event_log = EventLog::new(&event_log_path); - let mut state = TestState::new(logctx.log.clone()); + let log = logctx.log.new(o!("component" => "tq-proptest")); + let mut state = TestState::new(log.clone()); // Perform the initial setup - state.create_nexus_initial_config(input.initial_config); - state.setup_initial_connections(input.initial_down_nodes); - state.send_reconfigure_msg(); - - // Check the results of the initial setup - state.postcondition_initial_configuration()?; - - // Put the coordinator's outgoing messages on the wire if there are any - state.send_envelopes_from_coordinator(); + let event = state + .initial_config_event(input.initial_config, input.initial_down_nodes); + event_log.record(&event); + state.tq_state.apply_event(event); // Start executing the actions - state.run_actions(input.actions)?; + state.run_actions(input.actions, &mut event_log)?; info!( - state.log, + log, "Test complete"; "skipped_actions" => state.skipped_actions ); + let _ = std::fs::remove_file(event_log_path); logctx.cleanup_successful(); } diff --git a/trust-quorum/tqdb/Cargo.toml b/trust-quorum/tqdb/Cargo.toml new file mode 100644 index 0000000000..4436cc99fb --- /dev/null +++ b/trust-quorum/tqdb/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "tqdb" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +camino.workspace = true +clap.workspace = true +colored.workspace = true +daft.workspace = true +iddqd.workspace = true +omicron-repl-utils.workspace = true +reedline.workspace = true +reconfigurator-cli.workspace = true +serde_json.workspace = true +slog.workspace = true +tabled.workspace = true +trust-quorum = { workspace = true, features = ["danger_partial_eq_ct_wrapper"] } +trust-quorum-test-utils.workspace = true + +omicron-workspace-hack.workspace = true + +[[bin]] +name = "tqdb" +path = "src/bin/tqdb/main.rs" diff --git a/trust-quorum/tqdb/example-event-logs/cluster-49df2a4b903c778a-test_trust_quorum_protocol.14368.453-events.json b/trust-quorum/tqdb/example-event-logs/cluster-49df2a4b903c778a-test_trust_quorum_protocol.14368.453-events.json new file mode 100644 index 0000000000..f518a84590 --- /dev/null +++ b/trust-quorum/tqdb/example-event-logs/cluster-49df2a4b903c778a-test_trust_quorum_protocol.14368.453-events.json @@ -0,0 +1,4668 @@ +[ +{ + "InitialSetup": { + "member_universe_size": 40, + "config": { + "op": "Preparing", + "epoch": 1, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "1" + }, + "members": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "15" + }, + { + "part_number": "test", + "serial_number": "25" + }, + { + "part_number": "test", + "serial_number": "27" + }, + { + "part_number": "test", + "serial_number": "3" + }, + { + "part_number": "test", + "serial_number": "32" + }, + { + "part_number": "test", + "serial_number": "34" + }, + { + "part_number": "test", + "serial_number": "37" + }, + { + "part_number": "test", + "serial_number": "39" + }, + { + "part_number": "test", + "serial_number": "4" + }, + { + "part_number": "test", + "serial_number": "5" + }, + { + "part_number": "test", + "serial_number": "7" + }, + { + "part_number": "test", + "serial_number": "9" + } + ], + "threshold": 2, + "commit_crash_tolerance": 3, + "prepared_members": [], + "committed_members": [] + }, + "crashed_nodes": [ + { + "part_number": "test", + "serial_number": "11" + }, + { + "part_number": "test", + "serial_number": "16" + }, + { + "part_number": "test", + "serial_number": "3" + }, + { + "part_number": "test", + "serial_number": "7" + } + ] + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "37" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "25" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "9" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "32" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "34" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "5" + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "39" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "27" + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "4" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "15" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "15" + }, + { + "part_number": "test", + "serial_number": "27" + }, + { + "part_number": "test", + "serial_number": "34" + }, + { + "part_number": "test", + "serial_number": "39" + }, + { + "part_number": "test", + "serial_number": "4" + }, + { + "part_number": "test", + "serial_number": "5" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "15" + }, + { + "part_number": "test", + "serial_number": "25" + }, + { + "part_number": "test", + "serial_number": "27" + }, + { + "part_number": "test", + "serial_number": "32" + }, + { + "part_number": "test", + "serial_number": "34" + }, + { + "part_number": "test", + "serial_number": "37" + }, + { + "part_number": "test", + "serial_number": "39" + }, + { + "part_number": "test", + "serial_number": "4" + }, + { + "part_number": "test", + "serial_number": "5" + }, + { + "part_number": "test", + "serial_number": "9" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "15" + }, + { + "part_number": "test", + "serial_number": "25" + }, + { + "part_number": "test", + "serial_number": "27" + }, + { + "part_number": "test", + "serial_number": "32" + }, + { + "part_number": "test", + "serial_number": "34" + }, + { + "part_number": "test", + "serial_number": "37" + }, + { + "part_number": "test", + "serial_number": "39" + }, + { + "part_number": "test", + "serial_number": "4" + }, + { + "part_number": "test", + "serial_number": "5" + }, + { + "part_number": "test", + "serial_number": "9" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "15" + }, + { + "part_number": "test", + "serial_number": "25" + }, + { + "part_number": "test", + "serial_number": "27" + }, + { + "part_number": "test", + "serial_number": "32" + }, + { + "part_number": "test", + "serial_number": "34" + }, + { + "part_number": "test", + "serial_number": "37" + }, + { + "part_number": "test", + "serial_number": "39" + }, + { + "part_number": "test", + "serial_number": "4" + }, + { + "part_number": "test", + "serial_number": "5" + }, + { + "part_number": "test", + "serial_number": "9" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "15" + }, + { + "part_number": "test", + "serial_number": "25" + }, + { + "part_number": "test", + "serial_number": "27" + }, + { + "part_number": "test", + "serial_number": "32" + }, + { + "part_number": "test", + "serial_number": "34" + }, + { + "part_number": "test", + "serial_number": "37" + }, + { + "part_number": "test", + "serial_number": "39" + }, + { + "part_number": "test", + "serial_number": "4" + }, + { + "part_number": "test", + "serial_number": "5" + }, + { + "part_number": "test", + "serial_number": "9" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "15" + }, + { + "part_number": "test", + "serial_number": "25" + }, + { + "part_number": "test", + "serial_number": "27" + }, + { + "part_number": "test", + "serial_number": "32" + }, + { + "part_number": "test", + "serial_number": "34" + }, + { + "part_number": "test", + "serial_number": "37" + }, + { + "part_number": "test", + "serial_number": "39" + }, + { + "part_number": "test", + "serial_number": "4" + }, + { + "part_number": "test", + "serial_number": "5" + }, + { + "part_number": "test", + "serial_number": "9" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "15" + }, + { + "part_number": "test", + "serial_number": "25" + }, + { + "part_number": "test", + "serial_number": "27" + }, + { + "part_number": "test", + "serial_number": "32" + }, + { + "part_number": "test", + "serial_number": "34" + }, + { + "part_number": "test", + "serial_number": "37" + }, + { + "part_number": "test", + "serial_number": "39" + }, + { + "part_number": "test", + "serial_number": "4" + }, + { + "part_number": "test", + "serial_number": "5" + }, + { + "part_number": "test", + "serial_number": "9" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "15" + }, + { + "part_number": "test", + "serial_number": "25" + }, + { + "part_number": "test", + "serial_number": "27" + }, + { + "part_number": "test", + "serial_number": "32" + }, + { + "part_number": "test", + "serial_number": "34" + }, + { + "part_number": "test", + "serial_number": "37" + }, + { + "part_number": "test", + "serial_number": "39" + }, + { + "part_number": "test", + "serial_number": "4" + }, + { + "part_number": "test", + "serial_number": "5" + }, + { + "part_number": "test", + "serial_number": "9" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "15" + }, + { + "part_number": "test", + "serial_number": "25" + }, + { + "part_number": "test", + "serial_number": "27" + }, + { + "part_number": "test", + "serial_number": "32" + }, + { + "part_number": "test", + "serial_number": "34" + }, + { + "part_number": "test", + "serial_number": "37" + }, + { + "part_number": "test", + "serial_number": "39" + }, + { + "part_number": "test", + "serial_number": "4" + }, + { + "part_number": "test", + "serial_number": "5" + }, + { + "part_number": "test", + "serial_number": "9" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "15" + }, + { + "part_number": "test", + "serial_number": "25" + }, + { + "part_number": "test", + "serial_number": "27" + }, + { + "part_number": "test", + "serial_number": "32" + }, + { + "part_number": "test", + "serial_number": "34" + }, + { + "part_number": "test", + "serial_number": "37" + }, + { + "part_number": "test", + "serial_number": "39" + }, + { + "part_number": "test", + "serial_number": "4" + }, + { + "part_number": "test", + "serial_number": "5" + }, + { + "part_number": "test", + "serial_number": "9" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 1, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "15" + }, + { + "part_number": "test", + "serial_number": "25" + }, + { + "part_number": "test", + "serial_number": "27" + }, + { + "part_number": "test", + "serial_number": "32" + }, + { + "part_number": "test", + "serial_number": "34" + }, + { + "part_number": "test", + "serial_number": "37" + }, + { + "part_number": "test", + "serial_number": "39" + }, + { + "part_number": "test", + "serial_number": "4" + }, + { + "part_number": "test", + "serial_number": "5" + }, + { + "part_number": "test", + "serial_number": "9" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 2, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "1" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 2, + "commit_crash_tolerance": 1, + "prepared_members": [], + "committed_members": [] + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 2, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 3, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "3" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 2, + "commit_crash_tolerance": 1, + "prepared_members": [], + "committed_members": [] + } +}, +{ + "AbortConfiguration": 3 +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 4, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "3" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 2, + "commit_crash_tolerance": 1, + "prepared_members": [], + "committed_members": [] + } +}, +{ + "AbortConfiguration": 4 +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 5, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "2" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 3, + "commit_crash_tolerance": 0, + "prepared_members": [], + "committed_members": [] + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 5, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 5, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 5, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 5, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 6, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "2" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ], + "threshold": 2, + "commit_crash_tolerance": 0, + "prepared_members": [], + "committed_members": [] + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 6, + "acks": [ + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 6, + "acks": [ + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 6, + "acks": [ + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 6, + "acks": [ + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 6, + "acks": [ + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 6, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 6, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 6, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 6, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 6, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 6, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 7, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "2" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ], + "threshold": 2, + "commit_crash_tolerance": 0, + "prepared_members": [], + "committed_members": [] + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 8, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "0" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 2, + "commit_crash_tolerance": 1, + "prepared_members": [], + "committed_members": [] + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 8, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 8, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 8, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 8, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 8, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 8, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 8, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 8, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 9, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "2" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 2, + "commit_crash_tolerance": 1, + "prepared_members": [], + "committed_members": [] + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 9, + "acks": [ + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 9, + "acks": [ + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 9, + "acks": [ + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 9, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 10, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "1" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 2, + "commit_crash_tolerance": 1, + "prepared_members": [], + "committed_members": [] + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 10, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 11, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "1" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 2, + "commit_crash_tolerance": 1, + "prepared_members": [], + "committed_members": [] + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 11, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 12, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "1" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 2, + "commit_crash_tolerance": 1, + "prepared_members": [], + "committed_members": [] + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 12, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 12, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 12, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 12, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 12, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 12, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 12, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 13, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "0" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 2, + "commit_crash_tolerance": 1, + "prepared_members": [], + "committed_members": [] + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 13, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 14, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "2" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 3, + "commit_crash_tolerance": 0, + "prepared_members": [], + "committed_members": [] + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 15, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "3" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 2, + "commit_crash_tolerance": 1, + "prepared_members": [], + "committed_members": [] + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "AbortConfiguration": 15 +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 16, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "2" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ], + "threshold": 2, + "commit_crash_tolerance": 0, + "prepared_members": [], + "committed_members": [] + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 17, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "1" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ], + "threshold": 2, + "commit_crash_tolerance": 0, + "prepared_members": [], + "committed_members": [] + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 17, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 17, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 17, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 17, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 17, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 17, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 17, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 17, + "acks": [ + { + "part_number": "test", + "serial_number": "1" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 17, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 17, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 17, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 17, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "Reconfigure": { + "op": "Preparing", + "epoch": 18, + "last_committed_epoch": null, + "coordinator": { + "part_number": "test", + "serial_number": "2" + }, + "members": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + }, + { + "part_number": "test", + "serial_number": "3" + } + ], + "threshold": 2, + "commit_crash_tolerance": 1, + "prepared_members": [], + "committed_members": [] + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "0" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "1" + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +{ + "DeliverEnvelope": { + "destination": { + "part_number": "test", + "serial_number": "2" + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply", +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +{ + "SendNexusReplyOnUnderlay": { + "AckedPreparesFromCoordinator": { + "epoch": 18, + "acks": [ + { + "part_number": "test", + "serial_number": "0" + }, + { + "part_number": "test", + "serial_number": "1" + }, + { + "part_number": "test", + "serial_number": "2" + } + ] + } + } +}, +"DeliverNexusReply", +"DeliverNexusReply" +] diff --git a/trust-quorum/tqdb/src/bin/tqdb/main.rs b/trust-quorum/tqdb/src/bin/tqdb/main.rs new file mode 100644 index 0000000000..b7e44e590f --- /dev/null +++ b/trust-quorum/tqdb/src/bin/tqdb/main.rs @@ -0,0 +1,716 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The Trust Quorum Debugger +//! +//! Capable of executing and stepping through event streams generated by +//! trust quorum proptests. + +use anyhow::{Context, bail}; +use camino::Utf8PathBuf; +use clap::{Args, Parser, Subcommand}; +use daft::Diffable; +use omicron_repl_utils::run_repl_on_stdin_customized; +use reconfigurator_cli::LogCapture; +use reedline::{ + ColumnarMenu, DefaultCompleter, DefaultPrompt, DefaultPromptSegment, Emacs, + FileBackedHistory, KeyCode, KeyModifiers, MenuBuilder, Reedline, + ReedlineEvent, default_emacs_keybindings, +}; +use slog::{Logger, o}; +use std::collections::{BTreeMap, BTreeSet}; +use std::fmt::Write; +use std::fs; +use std::io::IsTerminal; +use tabled::Tabled; +use trust_quorum::PlatformId; +use trust_quorum_test_utils::{Event, TqState}; + +fn main() -> Result<(), anyhow::Error> { + let repl = TqdbRepl {}; + repl.exec() +} + +/// Internal debugger state +pub struct Tqdb { + event_log_path: Option, + + events: Vec, + + // Current state of the trust-quorum + current_state: TqState, + + // Index of the next event to be applied + next_event: usize, + + // All set breakpoints at log event index + breakpoints: BTreeSet, + + // All snapshots ever taken. + // + // Snapshots are taken automatically when a breakpoint is hit. + snapshots: BTreeMap, + + // Snapshot requests for events that haven't yet been applied + pending_snapshots: BTreeSet, +} + +impl Tqdb { + pub fn new(log: &Logger) -> Self { + let log = log.new(o!("component" => "tqdb")); + Tqdb { + event_log_path: None, + events: vec![], + current_state: TqState::new(log), + next_event: 0, + breakpoints: BTreeSet::new(), + snapshots: BTreeMap::new(), + pending_snapshots: BTreeSet::new(), + } + } + + pub fn reset_state(&mut self) { + let Tqdb { + event_log_path, + events, + current_state, + next_event, + breakpoints, + snapshots, + pending_snapshots, + } = self; + let log = current_state.log.clone(); + + *event_log_path = None; + *events = vec![]; + *current_state = TqState::new(log); + *next_event = 0; + *breakpoints = BTreeSet::new(); + *snapshots = BTreeMap::new(); + *pending_snapshots = BTreeSet::new(); + } + + pub fn toggle_breakpoint(&mut self, index: usize) -> anyhow::Result { + if index >= self.events.len() { + bail!( + "Invalid event index: {index}. Only {} total events.", + self.events.len() + ); + } + if !self.breakpoints.remove(&index) { + let _ = self.breakpoints.insert(index); + Ok(true) + } else { + Ok(false) + } + } + + // Reset the state to the beginning of time + // + // Don't remove any breakpoints, snapshots, or pending snapshots + pub fn rewind(&mut self) { + let Tqdb { + event_log_path: _, + events: _, + current_state, + next_event, + breakpoints: _, + snapshots: _, + pending_snapshots: _, + } = self; + + *current_state = TqState::new(current_state.log.clone()); + *next_event = 0; + } + + pub fn breakpoints(&self) -> &BTreeSet { + &self.breakpoints + } + + pub fn maybe_snapshot(&mut self) { + if self.next_event == 0 { + return; + } + let curr_event = self.next_event - 1; + if self.pending_snapshots.remove(&curr_event) { + self.snapshots.insert(curr_event, self.current_state.clone()); + } + } +} + +/// Interactive REPL for our trust quorum debugger +pub struct TqdbRepl {} + +impl TqdbRepl { + /// Execute the command. + pub fn exec(self) -> anyhow::Result<()> { + let (log_capture, log) = + LogCapture::new(std::io::stdout().is_terminal()); + + let mut tqdb = Tqdb::new(&log); + + let mut completer = Box::new(DefaultCompleter::with_inclusions(&['-'])); + completer.insert(Self::commands()); + let completion_menu = + Box::new(ColumnarMenu::default().with_name("commands")); + let mut keybindings = default_emacs_keybindings(); + keybindings.add_binding( + KeyModifiers::NONE, + KeyCode::Tab, + ReedlineEvent::UntilFound(vec![ + ReedlineEvent::Menu("commands".to_string()), + ReedlineEvent::MenuNext, + ]), + ); + let edit_mode = Box::new(Emacs::new(keybindings)); + + let history = Box::new( + FileBackedHistory::with_file( + 10000, + "/tmp/.tqdb-history.txt".into(), + ) + .expect("Error configuring history with file"), + ); + + let ed = Reedline::create() + .with_history(history) + .with_completer(completer) + .with_menu(reedline::ReedlineMenu::EngineCompleter(completion_menu)) + .with_edit_mode(edit_mode); + + let prompt = DefaultPrompt::new( + DefaultPromptSegment::Basic("tqdb".into()), + DefaultPromptSegment::Empty, + ); + + run_repl_on_stdin_customized(ed, &prompt, &mut |cmd: TopLevelArgs| { + process_command(&mut tqdb, cmd, &log_capture) + }) + } + + // Update this with each new subcommand + fn commands() -> Vec { + // This assignment and match exists soley to ensure we update our list + // when we add or remove a command. + let c = Commands::Run; + match c { + Commands::Open { .. } + | Commands::Run + | Commands::Step { .. } + | Commands::BreakpointToggle { .. } + | Commands::BreakpointList + | Commands::Snapshot { .. } + | Commands::SnapshotList + | Commands::SnapshotListPending + | Commands::SnapshotShow { .. } + | Commands::Diff { .. } + | Commands::NodeShow { .. } + | Commands::Rewind + | Commands::Events(_) + | Commands::Show + | Commands::Summary => {} + } + + [ + "open", + "run", + "step", + "breakpoint-toggle", + "breakpoint-list", + "snapshot", + "snapshot-list", + "snapshot-list-pending", + "snapshot-show", + "diff", + "node-show", + "rewind", + "events", + "show", + "summary", + ] + .into_iter() + .map(Into::into) + .collect() + } +} + +/// Arguments for our debugger REPL +#[derive(Parser, Debug)] +struct TopLevelArgs { + #[command(subcommand)] + command: Commands, +} + +#[derive(Debug, Subcommand)] +enum Commands { + // open event log file + #[clap(alias = "o")] + Open { + /// path to the event log file + path: Utf8PathBuf, + }, + // apply all events until completion or a breakpoint + #[clap(alias = "r")] + Run, + /// step over n events by applying them + /// + /// This command steps over breakpoints. Use `run` if you want to stop at + /// breakpoints. + #[clap(alias = "s")] + Step { + /// number of events to apply, 1 if not given + num_events: Option, + }, + + /// toggle a breakpoint at a given event + #[clap(alias = "b")] + BreakpointToggle { + /// index of the event in the log to apply the breakpoint to + index: usize, + }, + /// display all existing breakpoints + BreakpointList, + + /// take a snapshot of the current state, or at the given event when reached + Snapshot { + /// index of the event to take snapshot + index: Option, + }, + /// display all existing snapshots + SnapshotList, + /// show a snapshot for the given event + SnapshotShow { + /// index of the event where the snapshot was taken + index: usize, + }, + /// list all pending snapshots + SnapshotListPending, + + /// show the difference between the current state and the snapshot + /// or two snapshots if two indexes are given + #[clap(alias = "d")] + Diff { + /// The event log index of where the snapshot was taken + snapshot1: usize, + + /// An optional second snapshot index + snapshot2: Option, + }, + + /// display the current state of a SUT node and its context + NodeShow { + /// The serial number of the node to print. + /// Print all state if not present. + serial: usize, + }, + + /// Reset the state to the beginning of the trace + /// + /// This does not remove breakpoints or pending snapshots + Rewind, + + /// show the full state of the system + Show, + + /// print an overview of the current state of the system + Summary, + + /// display log entries - next entry by default + Events(EventsArgs), +} + +#[derive(Debug, Args)] +struct EventsArgs { + #[clap(subcommand)] + command: Option, +} + +#[derive(Debug, Subcommand)] +pub enum EventsCommand { + All, + Next { num_events: usize }, + Range { start: usize, end: usize }, +} + +/// Processes one "line" of user input. +fn process_command( + tqdb: &mut Tqdb, + cmd: TopLevelArgs, + logs: &LogCapture, +) -> anyhow::Result> { + let TopLevelArgs { command } = cmd; + let cmd_result = match command { + Commands::Open { path } => cmd_open(tqdb, path), + Commands::Run {} => cmd_run(tqdb), + Commands::Step { num_events } => cmd_step(tqdb, num_events), + Commands::BreakpointToggle { index } => { + cmd_toggle_breakpoint(tqdb, index) + } + Commands::BreakpointList {} => cmd_breakpoint_list(tqdb), + Commands::Diff { snapshot1, snapshot2 } => { + cmd_diff(tqdb, snapshot1, snapshot2) + } + Commands::Snapshot { index } => cmd_snapshot(tqdb, index), + Commands::SnapshotList {} => cmd_snapshot_list(tqdb), + Commands::SnapshotListPending => cmd_snapshot_list_pending(tqdb), + Commands::SnapshotShow { index } => cmd_snapshot_show(tqdb, index), + Commands::NodeShow { serial } => cmd_node_show(tqdb, serial), + Commands::Rewind => cmd_rewind(tqdb), + Commands::Show => cmd_show(tqdb), + Commands::Events(args) => cmd_log_show(tqdb, args), + Commands::Summary {} => cmd_summary(tqdb), + }; + + for line in logs.take_log_lines() { + println!("{line}"); + } + + cmd_result +} + +/// Open an event log file for debugging +fn cmd_open( + tqdb: &mut Tqdb, + path: Utf8PathBuf, +) -> anyhow::Result> { + tqdb.reset_state(); + let json = fs::read_to_string(&path).context(path.clone())?; + let events: Vec = serde_json::from_str(&json) + .context("failed to deserialize event log")?; + tqdb.event_log_path = Some(path.clone()); + tqdb.events = events; + Ok(Some(format!("loaded event log: {path}\n{} events.", tqdb.events.len()))) +} + +/// Apply all events until completion or a breakpoint +fn cmd_run(tqdb: &mut Tqdb) -> anyhow::Result> { + if tqdb.event_log_path.is_none() { + bail!("please open an event log file"); + } + + let mut num_events = 0; + if tqdb.next_event < tqdb.events.len() { + let end = tqdb + .breakpoints + .iter() + .cloned() + .find(|&i| i > tqdb.next_event) + .unwrap_or(tqdb.events.len()); + let events: Vec<_> = tqdb.events[tqdb.next_event..end].to_vec(); + for event in events { + tqdb.current_state.apply_event(event); + num_events += 1; + tqdb.next_event += 1; + tqdb.maybe_snapshot(); + } + } + + let output = if tqdb.next_event == tqdb.events.len() { + format!("done: applied {} events", num_events) + } else { + format!( + "stopped at breakpoint {} after applying {} events", + tqdb.next_event, num_events + ) + }; + Ok(Some(output)) +} + +// Step through each event by applying them +fn cmd_step( + tqdb: &mut Tqdb, + num_events: Option, +) -> anyhow::Result> { + if tqdb.event_log_path.is_none() { + bail!("please open an event log file"); + } + + let num_events = num_events.unwrap_or(1); + + let end = tqdb.next_event + num_events; + if end > tqdb.events.len() { + bail!("Number of events to step exceeds remaining events"); + } + + let mut s = String::new(); + let mut applied_events = 0; + let events: Vec<_> = tqdb.events[tqdb.next_event..end].to_vec(); + for event in events { + writeln!(&mut s, "{} {event:#?}", tqdb.next_event)?; + tqdb.current_state.apply_event(event.clone()); + applied_events += 1; + tqdb.next_event += 1; + tqdb.maybe_snapshot(); + } + writeln!(&mut s, "done: applied {} events", applied_events)?; + Ok(Some(s)) +} + +fn cmd_toggle_breakpoint( + tqdb: &mut Tqdb, + index: usize, +) -> anyhow::Result> { + let output = if tqdb.toggle_breakpoint(index)? { + format!("breakpoint set at event {index}") + } else { + format!("breakpoint removed at event {index}") + }; + Ok(Some(output)) +} + +fn cmd_breakpoint_list(tqdb: &mut Tqdb) -> anyhow::Result> { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct Breakpoint { + index: usize, + event: String, + } + + let rows = tqdb.breakpoints.iter().map(|i| Breakpoint { + index: *i, + event: format!("{:#?}", tqdb.events[*i]), + }); + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + Ok(Some(table)) +} + +fn cmd_snapshot_list(tqdb: &mut Tqdb) -> anyhow::Result> { + let output = if tqdb.snapshots.is_empty() { + "no snapshots present".to_string() + } else { + let mut s = String::new(); + writeln!(&mut s, "Snapshot indexes: ")?; + for i in tqdb.snapshots.keys() { + writeln!(&mut s, "{i} ")?; + } + s + }; + Ok(Some(output)) +} + +fn cmd_snapshot_list_pending( + tqdb: &mut Tqdb, +) -> anyhow::Result> { + let output = if tqdb.pending_snapshots.is_empty() { + "no snapshots pending".to_string() + } else { + let mut s = String::new(); + writeln!(&mut s, "pending snapshot indexes: ")?; + for i in &tqdb.pending_snapshots { + writeln!(&mut s, "{i} ")?; + } + s + }; + Ok(Some(output)) +} + +fn cmd_snapshot( + tqdb: &mut Tqdb, + index: Option, +) -> anyhow::Result> { + if tqdb.event_log_path.is_none() { + bail!("please open an event log file"); + } + + if tqdb.next_event == 0 && index.is_none() { + bail!("please apply an event to generate a useful state"); + } + + let output = if let Some(index) = index { + if index < tqdb.next_event { + tqdb.pending_snapshots.insert(index); + "Setting pending snapshot.\n + Already applied event however. + Use 'rewind' to start over." + .to_string() + } else if index > tqdb.events.len() { + bail!( + "index out of bounds. Only {} total events.", + tqdb.events.len() + ); + } else { + tqdb.pending_snapshots.insert(index); + "Setting pending snapshot".to_string() + } + } else { + tqdb.snapshots.insert( + tqdb.next_event.checked_sub(1).unwrap(), + tqdb.current_state.clone(), + ); + "Taking snapshot at current state".to_string() + }; + + Ok(Some(output)) +} + +fn cmd_snapshot_show( + tqdb: &mut Tqdb, + index: usize, +) -> anyhow::Result> { + match tqdb.snapshots.get(&index) { + Some(s) => Ok(Some(format!("{s:#?}"))), + None => bail!("no such snapshot"), + } +} + +fn cmd_diff( + tqdb: &mut Tqdb, + snapshot1: usize, + snapshot2: Option, +) -> anyhow::Result> { + if tqdb.event_log_path.is_none() { + bail!("please open an event log file"); + } + + if snapshot2.is_none() && tqdb.next_event == 0 { + bail!("please apply an event to get a useful state to diff with"); + } + + let Some(s1) = tqdb.snapshots.get(&snapshot1) else { + bail!("snapshot at {snapshot1} doesn't exist"); + }; + let diff = match snapshot2 { + Some(snapshot2) => { + let Some(s2) = tqdb.snapshots.get(&snapshot2) else { + bail!("snapshot at {snapshot2} doesn't exist"); + }; + if snapshot1 < snapshot2 { s1.diff(s2) } else { s2.diff(s1) } + } + None => { + if snapshot1 < tqdb.next_event { + s1.diff(&tqdb.current_state) + } else { + tqdb.current_state.diff(&s1) + } + } + }; + Ok(Some(format!("{diff}"))) +} + +fn cmd_show(tqdb: &Tqdb) -> anyhow::Result> { + if tqdb.event_log_path.is_none() { + bail!("please open an event log file"); + } + Ok(Some(format!("{:#?}", tqdb.current_state))) +} + +fn cmd_node_show( + tqdb: &mut Tqdb, + serial: usize, +) -> anyhow::Result> { + let id = PlatformId::new("test".into(), serial.to_string()); + let Some((node, ctx)) = tqdb.current_state.sut.nodes.get(&id) else { + bail!("failed to load node: {id}"); + }; + + Ok(Some(format!("{node:#?}\n{ctx:#?}"))) +} + +fn cmd_rewind(tqdb: &mut Tqdb) -> anyhow::Result> { + tqdb.rewind(); + + let mut s = String::new(); + writeln!(&mut s, "Re-initialized state and set next-event to 0")?; + writeln!(&mut s, "Breakpoints, Snapshots, and pending snapshots remain")?; + + Ok(Some(s)) +} + +fn cmd_log_show( + tqdb: &Tqdb, + args: EventsArgs, +) -> anyhow::Result> { + if tqdb.events.is_empty() { + bail!("no events loaded. Please call 'open' on a valid file"); + } + + // Find the possible start and end range of events + let (start, end) = match args.command { + Some(EventsCommand::All) => (0, tqdb.events.len()), + Some(EventsCommand::Next { num_events }) => { + (tqdb.next_event, tqdb.next_event + num_events) + } + Some(EventsCommand::Range { start, end }) => (start, end), + None => (tqdb.next_event, tqdb.next_event + 1), + }; + + let mut s = String::new(); + if start == tqdb.events.len() { + writeln!(&mut s, "finished applying events")?; + } else { + let end = usize::min(end, tqdb.events.len()); + for i in start..end { + writeln!(&mut s, "{i} {:#?}", tqdb.events[i])?; + } + } + + Ok(Some(s)) +} + +fn cmd_summary(tqdb: &mut Tqdb) -> anyhow::Result> { + let mut s = String::new(); + if let Some(path) = &tqdb.event_log_path { + writeln!(&mut s, "event log path: {:?}", path)?; + writeln!(&mut s, "total events in log: {}", tqdb.events.len())?; + } else { + bail!("no event log loaded: Please call 'open'"); + } + if tqdb.next_event != tqdb.events.len() { + writeln!(&mut s, "next event to apply: {}", tqdb.next_event)?; + writeln!(&mut s, " {:#?}", tqdb.events[tqdb.next_event])?; + } else { + writeln!(&mut s, "finished applying events")?; + } + + writeln!( + &mut s, + "total nodes under test: {}", + tqdb.current_state.sut.nodes.len() + )?; + let total_bootstrap_msgs = tqdb + .current_state + .bootstrap_network + .iter() + .fold(0, |acc, (_, e)| acc + e.len()); + writeln!( + &mut s, + "bootstrap network messages in flight: {}", + total_bootstrap_msgs + )?; + + if tqdb.next_event > 0 { + let latest_config = tqdb.current_state.nexus.latest_config(); + writeln!(&mut s, "nexus config: ")?; + writeln!(&mut s, " epoch: {}", latest_config.epoch)?; + writeln!(&mut s, " op: {:?}", latest_config.op)?; + writeln!( + &mut s, + " coordinator: {}", + latest_config.coordinator.serial_number() + )?; + writeln!(&mut s, " total members: {}", latest_config.members.len())?; + writeln!( + &mut s, + " prepared members: {}", + latest_config.prepared_members.len() + )?; + writeln!( + &mut s, + " committed members: {}", + latest_config.committed_members.len() + )?; + writeln!(&mut s, " threshold: {}", latest_config.threshold.0)?; + writeln!( + &mut s, + " commit crash tolerance: {}", + latest_config.commit_crash_tolerance + )?; + } + + Ok(Some(s)) +}