Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 31 additions & 1 deletion bootstore/src/schemes/v0/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
//! 2. A network config blob required for pre-rack-unlock configuration
//!

use crate::schemes::v0::SharePkgCommon;

use super::{Fsm, FsmConfig, State};
use camino::Utf8PathBuf;
use omicron_common::ledger::{Ledger, Ledgerable};
use serde::{Deserialize, Serialize};
use sled_hardware_types::Baseboard;
use slog::{Logger, info};
use slog::{Logger, info, warn};

/// A persistent version of `Fsm::State`
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
Expand Down Expand Up @@ -84,6 +86,34 @@ impl PersistentFsmState {
(Fsm::new_uninitialized(node_id, config), 0)
}
}

/// Load the ledger for use by trust quorum
pub async fn load_for_trust_quorum_upgrade(
log: &Logger,
paths: Vec<Utf8PathBuf>,
) -> Option<SharePkgCommon> {
let ledger = Ledger::<PersistentFsmState>::new(&log, paths).await?;
let persistent_state = ledger.into_inner();
info!(
log,
"Loaded LRTQ PersistentFsmState from ledger in state {} with generation {}",
persistent_state.state.name(),
persistent_state.generation
);

match persistent_state.state {
State::Uninitialized | State::Learning => {
warn!(
log,
"Unexpected LRTQ state: {}. No share available.",
persistent_state.state.name()
);
None
}
State::InitialMember { pkg, .. } => Some(pkg.common.clone()),
State::Learned { pkg } => Some(pkg.common.clone()),
}
}
}

/// Network configuration required to bring up the control plane
Expand Down
1 change: 1 addition & 0 deletions clients/nexus-lockstep-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ progenitor::generate_api!(
ReconfiguratorConfigView = nexus_types::deployment::ReconfiguratorConfigView,
RecoverySiloConfig = sled_agent_types_versions::latest::rack_init::RecoverySiloConfig,
SledAgentUpdateStatus = nexus_types::internal_api::views::SledAgentUpdateStatus,
TrustQuorumConfig = nexus_types::trust_quorum::TrustQuorumConfig,
UpdateStatus = nexus_types::internal_api::views::UpdateStatus,
ZoneStatus = nexus_types::internal_api::views::ZoneStatus,
ZpoolName = omicron_common::zpool_name::ZpoolName,
Expand Down
1 change: 1 addition & 0 deletions clients/sled-agent-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ progenitor::generate_api!(
Inventory = sled_agent_types_versions::latest::inventory::Inventory,
InventoryDisk = sled_agent_types_versions::latest::inventory::InventoryDisk,
InventoryZpool = sled_agent_types_versions::latest::inventory::InventoryZpool,
LrtqUpgradeMsg = trust_quorum_types::messages::LrtqUpgradeMsg,
MacAddr = omicron_common::api::external::MacAddr,
MupdateOverrideBootInventory = sled_agent_types_versions::latest::inventory::MupdateOverrideBootInventory,
Name = omicron_common::api::external::Name,
Expand Down
1 change: 1 addition & 0 deletions dev-tools/omdb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ supports-color.workspace = true
tabled.workspace = true
textwrap.workspace = true
tokio = { workspace = true, features = ["full"] }
trust-quorum-types.workspace = true
tufaceous-artifact.workspace = true
unicode-width.workspace = true
update-engine.workspace = true
Expand Down
100 changes: 100 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ use omicron_uuid_kinds::DemoSagaUuid;
use omicron_uuid_kinds::GenericUuid;
use omicron_uuid_kinds::ParseError;
use omicron_uuid_kinds::PhysicalDiskUuid;
use omicron_uuid_kinds::RackUuid;
use omicron_uuid_kinds::SledUuid;
use omicron_uuid_kinds::SupportBundleUuid;
use quiesce::QuiesceArgs;
Expand All @@ -97,6 +98,7 @@ use slog_error_chain::InlineErrorChain;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::fs::OpenOptions;
use std::num::ParseIntError;
use std::os::unix::fs::PermissionsExt;
use std::str::FromStr;
use std::sync::Arc;
Expand All @@ -108,6 +110,7 @@ use tabled::settings::Padding;
use tabled::settings::object::Columns;
use tokio::io::AsyncWriteExt;
use tokio::sync::OnceCell;
use trust_quorum_types::types::Epoch;
use update_engine::EventBuffer;
use update_engine::ExecutionStatus;
use update_engine::ExecutionTerminalInfo;
Expand Down Expand Up @@ -165,6 +168,8 @@ enum NexusCommands {
/// interact with support bundles
#[command(visible_alias = "sb")]
SupportBundles(SupportBundleArgs),
/// interact with the trust quorum
TrustQuorum(TrustQuorumArgs),
/// show running artifact versions
UpdateStatus(UpdateStatusArgs),
}
Expand Down Expand Up @@ -566,6 +571,43 @@ enum SupportBundleCommands {
Inspect(SupportBundleInspectArgs),
}

#[derive(Debug, Args)]
struct TrustQuorumArgs {
#[command(subcommand)]
command: TrustQuorumCommands,
}

#[derive(Debug, Subcommand)]
enum TrustQuorumCommands {
GetConfig(TrustQuorumConfigArgs),
LrtqUpgrade,
}

#[derive(Debug, Clone, Copy, Args)]
struct TrustQuorumConfigArgs {
rack_id: RackUuid,
epoch: TrustQuorumEpochOrLatest,
}

#[derive(Debug, Clone, Copy)]
pub(crate) enum TrustQuorumEpochOrLatest {
Latest,
Epoch(Epoch),
}

impl FromStr for TrustQuorumEpochOrLatest {
type Err = ParseIntError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
if matches!(s, "latest" | "current") {
Ok(Self::Latest)
} else {
let i: u64 = s.parse()?;
Ok(Self::Epoch(Epoch(i)))
}
}
}

#[derive(Debug, Args)]
struct SupportBundleDeleteArgs {
id: SupportBundleUuid,
Expand Down Expand Up @@ -860,6 +902,15 @@ impl NexusArgs {
NexusCommands::SupportBundles(SupportBundleArgs {
command: SupportBundleCommands::Inspect(args),
}) => cmd_nexus_support_bundles_inspect(&client, args).await,
NexusCommands::TrustQuorum(TrustQuorumArgs {
command: TrustQuorumCommands::GetConfig(args),
}) => cmd_nexus_trust_quorum_get_config(&client, args).await,
NexusCommands::TrustQuorum(TrustQuorumArgs {
command: TrustQuorumCommands::LrtqUpgrade,
}) => {
let token = omdb.check_allow_destructive()?;
cmd_nexus_trust_quorum_lrtq_upgrade(&client, token).await
}
NexusCommands::UpdateStatus(args) => {
cmd_nexus_update_status(&client, args).await
}
Expand Down Expand Up @@ -4452,6 +4503,55 @@ async fn cmd_nexus_support_bundles_list(
Ok(())
}

async fn cmd_nexus_trust_quorum_get_config(
client: &nexus_lockstep_client::Client,
args: &TrustQuorumConfigArgs,
) -> Result<(), anyhow::Error> {
let config = match args.epoch {
TrustQuorumEpochOrLatest::Latest => client
.trust_quorum_get_config(&args.rack_id.as_untyped_uuid(), None)
.await
.with_context(|| {
format!(
"getting latest trust quorum config for rack {}",
args.rack_id
)
})?,
TrustQuorumEpochOrLatest::Epoch(epoch) => client
.trust_quorum_get_config(
&args.rack_id.as_untyped_uuid(),
Some(epoch.0),
)
.await
.with_context(|| {
format!(
"getting trust quorum config for rack {}, epoch {}",
args.rack_id, epoch
)
})?,
}
.into_inner();

println!("{config:#?}");

Ok(())
}

async fn cmd_nexus_trust_quorum_lrtq_upgrade(
client: &nexus_lockstep_client::Client,
_destruction_token: DestructiveOperationToken,
) -> Result<(), anyhow::Error> {
let epoch = client
.trust_quorum_lrtq_upgrade()
.await
.context("lrtq upgrade")?
.into_inner();

println!("Started LRTQ upgrade at epoch {epoch}");

Ok(())
}

/// Runs `omdb nexus support-bundles create`
async fn cmd_nexus_support_bundles_create(
client: &nexus_lockstep_client::Client,
Expand Down
1 change: 1 addition & 0 deletions dev-tools/omdb/tests/usage_errors.out
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,7 @@ Commands:
sagas view sagas, create and complete demo sagas
sleds interact with sleds
support-bundles interact with support bundles [aliases: sb]
trust-quorum interact with the trust quorum
update-status show running artifact versions
help Print this message or the help of the given subcommand(s)

Expand Down
59 changes: 56 additions & 3 deletions nexus/db-queries/src/db/datastore/trust_quorum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use nexus_db_model::DbTypedUuid;
use nexus_db_model::HwBaseboardId;
use nexus_db_model::TrustQuorumConfiguration as DbTrustQuorumConfiguration;
use nexus_db_model::TrustQuorumMember as DbTrustQuorumMember;
use nexus_types::trust_quorum::IsLrtqUpgrade;
use nexus_types::trust_quorum::ProposedTrustQuorumConfig;
use nexus_types::trust_quorum::{
TrustQuorumConfig, TrustQuorumConfigState, TrustQuorumMemberData,
Expand Down Expand Up @@ -435,10 +436,17 @@ impl DataStore {
)
.await?;

// Ensure that epochs are sequential
// Ensure that epochs are sequential or this is the inital attempt at an
// LRTQ upgrade.
//
// In the latter case the proposed epoch will be 2, as LRTQ has an epoch
// of 1 that is encoded as a ZFS dataset property.
let latest_epoch = latest_config.as_ref().map(|c| c.epoch);
bail_unless!(
latest_epoch == proposed.epoch.previous(),
latest_epoch == proposed.epoch.previous()
|| (latest_epoch.is_none()
&& proposed.is_lrtq_upgrade == IsLrtqUpgrade::Yes
&& proposed.epoch == Epoch(2)),
"Epochs for trust quorum configurations must be sequential. \
Current epoch = {:?}, Proposed Epoch = {:?}",
latest_epoch,
Expand Down Expand Up @@ -1537,7 +1545,7 @@ mod tests {
.await
.unwrap();

// Last committed epoch is incoreect (should be 1)
// Last committed epoch is incorrect (should be 1)
let bad_config = ProposedTrustQuorumConfig {
rack_id,
epoch: Epoch(2),
Expand Down Expand Up @@ -1595,6 +1603,51 @@ mod tests {
logctx.cleanup_successful();
}

#[tokio::test]
async fn test_tq_insert_initial_lrtq_upgrade() {
let logctx = test_setup_log("test_tq_update_prepare_and_commit");
let db = TestDatabase::new_with_datastore(&logctx.log).await;
let (opctx, datastore) = (db.opctx(), db.datastore());

let hw_ids = insert_hw_baseboard_ids(&db).await;
let rack_id = RackUuid::new_v4();
let members: BTreeSet<_> =
hw_ids.iter().cloned().map(BaseboardId::from).collect();

// Propse a an LRTQ upgrade and successfully insert it
let config = ProposedTrustQuorumConfig {
rack_id,
epoch: Epoch(2),
is_lrtq_upgrade: IsLrtqUpgrade::Yes,
members: members.clone(),
};

// Insert should succeed
datastore.tq_insert_latest_config(opctx, config.clone()).await.unwrap();

// Read the config back and check that it's preparing for LRTQ upgrade
// with no acks.
let read_config = datastore
.tq_get_latest_config(opctx, rack_id)
.await
.expect("no error")
.expect("returned config");

// The read config should be preparing
assert_eq!(read_config.epoch, config.epoch);
assert_eq!(
read_config.state,
TrustQuorumConfigState::PreparingLrtqUpgrade
);
assert!(read_config.encrypted_rack_secrets.is_none());
assert!(read_config.members.iter().all(|(_, info)| {
info.state == TrustQuorumMemberState::Unacked
}));

db.terminate().await;
logctx.cleanup_successful();
}

#[tokio::test]
async fn test_tq_update_prepare_and_commit() {
let logctx = test_setup_log("test_tq_update_prepare_and_commit");
Expand Down
1 change: 1 addition & 0 deletions nexus/lockstep-api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ omicron-uuid-kinds.workspace = true
omicron-workspace-hack.workspace = true
schemars.workspace = true
serde.workspace = true
trust-quorum-types.workspace = true
uuid.workspace = true
30 changes: 25 additions & 5 deletions nexus/lockstep-api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ use omicron_uuid_kinds::*;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use trust_quorum_types::types::Epoch;
use uuid::Uuid;

const RACK_INITIALIZATION_REQUEST_MAX_BYTES: usize = 10 * 1024 * 1024;
Expand Down Expand Up @@ -564,15 +565,29 @@ pub trait NexusLockstepApi {
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<QuiesceStatus>, HttpError>;

/// Retrieve the latest ongoing rack cluster membership change
/// Retrieve the trust quorum configuration for the given epoch, or latest
// if no epoch is given
#[endpoint {
method = GET,
path = "/trust-quorum/{rack_id}/config/latest",
path = "/trust-quorum/config/{rack_id}",
}]
async fn trust_quorum_get_latest_config(
async fn trust_quorum_get_config(
rqctx: RequestContext<Self::Context>,
path_params: Path<RackPathParam>,
) -> Result<HttpResponseOk<Option<TrustQuorumConfig>>, HttpError>;
path_params: Path<params::RackMembershipConfigPathParams>,
query_params: Query<TrustQuorumEpochQueryParam>,
) -> Result<HttpResponseOk<TrustQuorumConfig>, HttpError>;

/// Initiate an LRTQ upgrade
///
/// Return the epoch of the proposed configuration, so it can be polled
/// asynchronously.
#[endpoint {
method = POST,
path = "/trust-quorum/lrtq-upgrade"
}]
async fn trust_quorum_lrtq_upgrade(
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<Epoch>, HttpError>;
}

/// Path parameters for Rack requests.
Expand Down Expand Up @@ -621,3 +636,8 @@ pub struct SledId {
pub struct VersionPathParam {
pub version: u32,
}

#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
pub struct TrustQuorumEpochQueryParam {
pub epoch: Option<Epoch>,
}
Loading
Loading