Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 36 additions & 1 deletion bootstore/src/schemes/v0/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
//! 2. A network config blob required for pre-rack-unlock configuration
//!

use crate::schemes::v0::SharePkgCommon;

use super::{Fsm, FsmConfig, State};
use camino::Utf8PathBuf;
use omicron_common::ledger::{Ledger, Ledgerable};
use serde::{Deserialize, Serialize};
use sled_hardware_types::Baseboard;
use slog::{Logger, info};
use slog::{Logger, info, warn};

/// A persistent version of `Fsm::State`
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
Expand Down Expand Up @@ -84,6 +86,39 @@ impl PersistentFsmState {
(Fsm::new_uninitialized(node_id, config), 0)
}
}

/// Load the ledger for use by trust quorum
pub async fn load_for_trust_quorum(
log: &Logger,
paths: Vec<Utf8PathBuf>,
) -> Option<SharePkgCommon> {
if let Some(ledger) =
Ledger::<PersistentFsmState>::new(&log, paths).await
{
let persistent_state = ledger.into_inner();
info!(
log,
"Loaded LRTQ PersistentFsmState from ledger in state {} with generation {}",
persistent_state.state.name(),
persistent_state.generation
);

match persistent_state.state {
State::Uninitialized | State::Learning => {
warn!(
log,
"Unexpected LRTQ state: {}. No share available.",
persistent_state.state.name()
);
None
}
State::InitialMember { pkg, .. } => Some(pkg.common.clone()),
State::Learned { pkg } => Some(pkg.common.clone()),
}
} else {
None
}
}
}

/// Network configuration required to bring up the control plane
Expand Down
1 change: 1 addition & 0 deletions clients/nexus-lockstep-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ progenitor::generate_api!(
ReconfiguratorConfigView = nexus_types::deployment::ReconfiguratorConfigView,
RecoverySiloConfig = sled_agent_types_versions::latest::rack_init::RecoverySiloConfig,
SledAgentUpdateStatus = nexus_types::internal_api::views::SledAgentUpdateStatus,
TrustQuorumConfig = nexus_types::trust_quorum::TrustQuorumConfig,
UpdateStatus = nexus_types::internal_api::views::UpdateStatus,
ZoneStatus = nexus_types::internal_api::views::ZoneStatus,
ZpoolName = omicron_common::zpool_name::ZpoolName,
Expand Down
1 change: 1 addition & 0 deletions clients/sled-agent-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ progenitor::generate_api!(
Inventory = sled_agent_types_versions::latest::inventory::Inventory,
InventoryDisk = sled_agent_types_versions::latest::inventory::InventoryDisk,
InventoryZpool = sled_agent_types_versions::latest::inventory::InventoryZpool,
LrtqUpgradeMsg = trust_quorum_types::messages::LrtqUpgradeMsg,
MacAddr = omicron_common::api::external::MacAddr,
MupdateOverrideBootInventory = sled_agent_types_versions::latest::inventory::MupdateOverrideBootInventory,
Name = omicron_common::api::external::Name,
Expand Down
1 change: 1 addition & 0 deletions dev-tools/omdb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ supports-color.workspace = true
tabled.workspace = true
textwrap.workspace = true
tokio = { workspace = true, features = ["full"] }
trust-quorum-types.workspace = true
tufaceous-artifact.workspace = true
unicode-width.workspace = true
update-engine.workspace = true
Expand Down
102 changes: 102 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ use omicron_uuid_kinds::DemoSagaUuid;
use omicron_uuid_kinds::GenericUuid;
use omicron_uuid_kinds::ParseError;
use omicron_uuid_kinds::PhysicalDiskUuid;
use omicron_uuid_kinds::RackUuid;
use omicron_uuid_kinds::SledUuid;
use omicron_uuid_kinds::SupportBundleUuid;
use quiesce::QuiesceArgs;
Expand All @@ -97,6 +98,7 @@ use slog_error_chain::InlineErrorChain;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::fs::OpenOptions;
use std::num::ParseIntError;
use std::os::unix::fs::PermissionsExt;
use std::str::FromStr;
use std::sync::Arc;
Expand All @@ -108,6 +110,7 @@ use tabled::settings::Padding;
use tabled::settings::object::Columns;
use tokio::io::AsyncWriteExt;
use tokio::sync::OnceCell;
use trust_quorum_types::types::Epoch;
use update_engine::EventBuffer;
use update_engine::ExecutionStatus;
use update_engine::ExecutionTerminalInfo;
Expand Down Expand Up @@ -165,6 +168,8 @@ enum NexusCommands {
/// interact with support bundles
#[command(visible_alias = "sb")]
SupportBundles(SupportBundleArgs),
/// interact with the trust quorum
TrustQuorum(TrustQuorumArgs),
/// show running artifact versions
UpdateStatus(UpdateStatusArgs),
}
Expand Down Expand Up @@ -566,6 +571,43 @@ enum SupportBundleCommands {
Inspect(SupportBundleInspectArgs),
}

#[derive(Debug, Args)]
struct TrustQuorumArgs {
#[command(subcommand)]
command: TrustQuorumCommands,
}

#[derive(Debug, Subcommand)]
enum TrustQuorumCommands {
GetConfig(TrustQuorumConfigArgs),
LrtqUpgrade,
}

#[derive(Debug, Clone, Copy, Args)]
struct TrustQuorumConfigArgs {
rack_id: RackUuid,
epoch: TrustQuorumEpochOrLatest,
}

#[derive(Debug, Clone, Copy)]
pub(crate) enum TrustQuorumEpochOrLatest {
Latest,
Epoch(Epoch),
}

impl FromStr for TrustQuorumEpochOrLatest {
type Err = ParseIntError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
if matches!(s, "latest" | "current") {
Ok(Self::Latest)
} else {
let i: u64 = s.parse()?;
Ok(Self::Epoch(Epoch(i)))
}
}
}

#[derive(Debug, Args)]
struct SupportBundleDeleteArgs {
id: SupportBundleUuid,
Expand Down Expand Up @@ -860,6 +902,15 @@ impl NexusArgs {
NexusCommands::SupportBundles(SupportBundleArgs {
command: SupportBundleCommands::Inspect(args),
}) => cmd_nexus_support_bundles_inspect(&client, args).await,
NexusCommands::TrustQuorum(TrustQuorumArgs {
command: TrustQuorumCommands::GetConfig(args),
}) => cmd_nexus_trust_quorum_get_config(&client, args).await,
NexusCommands::TrustQuorum(TrustQuorumArgs {
command: TrustQuorumCommands::LrtqUpgrade,
}) => {
let token = omdb.check_allow_destructive()?;
cmd_nexus_trust_quorum_lrtq_upgrade(&client, token).await
}
NexusCommands::UpdateStatus(args) => {
cmd_nexus_update_status(&client, args).await
}
Expand Down Expand Up @@ -4452,6 +4503,57 @@ async fn cmd_nexus_support_bundles_list(
Ok(())
}

async fn cmd_nexus_trust_quorum_get_config(
client: &nexus_lockstep_client::Client,
args: &TrustQuorumConfigArgs,
) -> Result<(), anyhow::Error> {
let config = match args.epoch {
TrustQuorumEpochOrLatest::Latest => client
.trust_quorum_get_config(&args.rack_id.as_untyped_uuid(), None)
.await
.with_context(|| {
format!(
"getting latest trust quorum config for rack {}",
args.rack_id
)
})?,
TrustQuorumEpochOrLatest::Epoch(epoch) => client
.trust_quorum_get_config(
&args.rack_id.as_untyped_uuid(),
Some(epoch.0),
)
.await
.with_context(|| {
format!(
"getting trust quorum config for rack {}, epoch {}",
args.rack_id, epoch
)
})?,
}
.into_inner();

println!("{config:#?}");

Ok(())
}

async fn cmd_nexus_trust_quorum_lrtq_upgrade(
client: &nexus_lockstep_client::Client,
_destruction_token: DestructiveOperationToken,
) -> Result<(), anyhow::Error> {
// We already know the uuid at Nexus. Don't make the user type it.
let fake_uuid = Uuid::nil();
let epoch = client
.trust_quorum_lrtq_upgrade(&fake_uuid)
.await
.context("lrtq upgrade")?
.into_inner();

println!("Started LRTQ upgrade at epoch {epoch}");

Ok(())
}

/// Runs `omdb nexus support-bundles create`
async fn cmd_nexus_support_bundles_create(
client: &nexus_lockstep_client::Client,
Expand Down
1 change: 1 addition & 0 deletions dev-tools/omdb/tests/usage_errors.out
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,7 @@ Commands:
sagas view sagas, create and complete demo sagas
sleds interact with sleds
support-bundles interact with support bundles [aliases: sb]
trust-quorum interact with the trust quorum
update-status show running artifact versions
help Print this message or the help of the given subcommand(s)

Expand Down
56 changes: 53 additions & 3 deletions nexus/db-queries/src/db/datastore/trust_quorum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use nexus_db_model::DbTypedUuid;
use nexus_db_model::HwBaseboardId;
use nexus_db_model::TrustQuorumConfiguration as DbTrustQuorumConfiguration;
use nexus_db_model::TrustQuorumMember as DbTrustQuorumMember;
use nexus_types::trust_quorum::IsLrtqUpgrade;
use nexus_types::trust_quorum::ProposedTrustQuorumConfig;
use nexus_types::trust_quorum::{
TrustQuorumConfig, TrustQuorumConfigState, TrustQuorumMemberData,
Expand Down Expand Up @@ -435,10 +436,17 @@ impl DataStore {
)
.await?;

// Ensure that epochs are sequential
// Ensure that epochs are sequential or this is the inital attempt at an
// LRTQ upgrade.
//
// In the latter case the proposed epoch will be 2, as LRTQ has an epoch
// of 1 that is encoded as a ZFS dataset property.
let latest_epoch = latest_config.as_ref().map(|c| c.epoch);
bail_unless!(
latest_epoch == proposed.epoch.previous(),
latest_epoch == proposed.epoch.previous()
|| (latest_epoch.is_none()
&& proposed.is_lrtq_upgrade == IsLrtqUpgrade::Yes
&& proposed.epoch == Epoch(2)),
"Epochs for trust quorum configurations must be sequential. \
Current epoch = {:?}, Proposed Epoch = {:?}",
latest_epoch,
Expand Down Expand Up @@ -1537,7 +1545,7 @@ mod tests {
.await
.unwrap();

// Last committed epoch is incoreect (should be 1)
// Last committed epoch is incorrect (should be 1)
let bad_config = ProposedTrustQuorumConfig {
rack_id,
epoch: Epoch(2),
Expand Down Expand Up @@ -1595,6 +1603,48 @@ mod tests {
logctx.cleanup_successful();
}

#[tokio::test]
async fn test_tq_insert_initial_lrtq_upgrade() {
let logctx = test_setup_log("test_tq_update_prepare_and_commit");
let db = TestDatabase::new_with_datastore(&logctx.log).await;
let (opctx, datastore) = (db.opctx(), db.datastore());

let hw_ids = insert_hw_baseboard_ids(&db).await;
let rack_id = RackUuid::new_v4();
let members: BTreeSet<_> =
hw_ids.iter().cloned().map(BaseboardId::from).collect();

// Propse a an LRTQ upgrade and successfully insert it
let config = ProposedTrustQuorumConfig {
rack_id,
epoch: Epoch(2),
is_lrtq_upgrade: IsLrtqUpgrade::Yes,
members: members.clone(),
};

// Insert should succeed
datastore.tq_insert_latest_config(opctx, config.clone()).await.unwrap();

// Read the config back and check that it's preparing for LRTQ upgrade
// with no acks.
let read_config = datastore
.tq_get_latest_config(opctx, rack_id)
.await
.expect("no error")
.expect("returned config");

// The read config should be preparing
assert_eq!(read_config.epoch, config.epoch);
assert_eq!(
read_config.state,
TrustQuorumConfigState::PreparingLrtqUpgrade
);
assert!(read_config.encrypted_rack_secrets.is_none());
assert!(read_config.members.iter().all(|(_, info)| {
info.state == TrustQuorumMemberState::Unacked
}));
}

#[tokio::test]
async fn test_tq_update_prepare_and_commit() {
let logctx = test_setup_log("test_tq_update_prepare_and_commit");
Expand Down
1 change: 1 addition & 0 deletions nexus/lockstep-api/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ omicron-uuid-kinds.workspace = true
omicron-workspace-hack.workspace = true
schemars.workspace = true
serde.workspace = true
trust-quorum-types.workspace = true
uuid.workspace = true
29 changes: 25 additions & 4 deletions nexus/lockstep-api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ use omicron_uuid_kinds::*;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use trust_quorum_types::types::Epoch;
use uuid::Uuid;

const RACK_INITIALIZATION_REQUEST_MAX_BYTES: usize = 10 * 1024 * 1024;
Expand Down Expand Up @@ -564,15 +565,30 @@ pub trait NexusLockstepApi {
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<QuiesceStatus>, HttpError>;

/// Retrieve the latest ongoing rack cluster membership change
/// Retrieve the trust quorum configuration for the given epoch, or latest
// if no epoch is given
#[endpoint {
method = GET,
path = "/trust-quorum/{rack_id}/config/latest",
path = "/trust-quorum/{rack_id}/config",
}]
async fn trust_quorum_get_latest_config(
async fn trust_quorum_get_config(
rqctx: RequestContext<Self::Context>,
path_params: Path<params::RackMembershipConfigPathParams>,
query_params: Query<TrustQuorumEpochQueryParam>,
) -> Result<HttpResponseOk<TrustQuorumConfig>, HttpError>;

/// Initiate an LRTQ upgrade
///
/// Return the epoch of the proposed configuration, so it can be polled
/// asynchronously.
#[endpoint {
method = POST,
path = "/trust-quorum/{rack_id}/lrtq-upgrade"
}]
async fn trust_quorum_lrtq_upgrade(
rqctx: RequestContext<Self::Context>,
path_params: Path<RackPathParam>,
) -> Result<HttpResponseOk<Option<TrustQuorumConfig>>, HttpError>;
) -> Result<HttpResponseOk<Epoch>, HttpError>;
}

/// Path parameters for Rack requests.
Expand Down Expand Up @@ -621,3 +637,8 @@ pub struct SledId {
pub struct VersionPathParam {
pub version: u32,
}

#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
pub struct TrustQuorumEpochQueryParam {
pub epoch: Option<Epoch>,
}
Loading
Loading