diff --git a/nexus/db-model/src/db_metadata.rs b/nexus/db-model/src/db_metadata.rs index de7e2862eb7..080da4d423c 100644 --- a/nexus/db-model/src/db_metadata.rs +++ b/nexus/db-model/src/db_metadata.rs @@ -3,8 +3,14 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::SemverVersion; +use crate::impl_enum_type; +use crate::typed_uuid::DbTypedUuid; use chrono::{DateTime, Utc}; use nexus_db_schema::schema::db_metadata; +use nexus_db_schema::schema::db_metadata_nexus; +use omicron_uuid_kinds::{ + BlueprintKind, BlueprintUuid, OmicronZoneKind, OmicronZoneUuid, +}; use serde::{Deserialize, Serialize}; /// Internal database metadata @@ -33,3 +39,47 @@ impl DbMetadata { &self.version } } + +impl_enum_type!( + DbMetadataNexusStateEnum: + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Serialize, Deserialize)] + pub enum DbMetadataNexusState; + + // Enum values + Active => b"active" + NotYet => b"not_yet" + Quiesced => b"quiesced" +); + +#[derive( + Queryable, Insertable, Debug, Clone, Selectable, Serialize, Deserialize, +)] +#[diesel(table_name = db_metadata_nexus)] +pub struct DbMetadataNexus { + nexus_id: DbTypedUuid, + last_drained_blueprint_id: Option>, + state: DbMetadataNexusState, +} + +impl DbMetadataNexus { + pub fn new(nexus_id: OmicronZoneUuid, state: DbMetadataNexusState) -> Self { + Self { + nexus_id: nexus_id.into(), + last_drained_blueprint_id: None, + state, + } + } + + pub fn state(&self) -> DbMetadataNexusState { + self.state + } + + pub fn nexus_id(&self) -> OmicronZoneUuid { + self.nexus_id.into() + } + + pub fn last_drained_blueprint_id(&self) -> Option { + self.last_drained_blueprint_id.map(|id| id.into()) + } +} diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index fd8fc89b3bb..37cd2f5d77e 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(183, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(184, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(184, "populate-db-metadata-nexus"), KnownVersion::new(183, "add-ip-version-to-pools"), KnownVersion::new(182, "add-tuf-artifact-board"), KnownVersion::new(181, "rename-nat-table"), @@ -227,6 +228,9 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { /// The earliest supported schema version. pub const EARLIEST_SUPPORTED_VERSION: Version = Version::new(1, 0, 0); +/// The version where "db_metadata_nexus" was added. +pub const DB_METADATA_NEXUS_SCHEMA_VERSION: Version = Version::new(184, 0, 0); + /// Describes one version of the database schema #[derive(Debug, Clone)] struct KnownVersion { diff --git a/nexus/db-queries/src/db/datastore/db_metadata.rs b/nexus/db-queries/src/db/datastore/db_metadata.rs index dbc1de58571..55094660d0e 100644 --- a/nexus/db-queries/src/db/datastore/db_metadata.rs +++ b/nexus/db-queries/src/db/datastore/db_metadata.rs @@ -4,23 +4,79 @@ //! [`DataStore`] methods on Database Metadata. -use super::DataStore; +use super::{DataStore, DbConnection, IdentityCheckPolicy}; +use crate::authz; +use crate::context::OpContext; + use anyhow::{Context, bail, ensure}; use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection}; use chrono::Utc; use diesel::prelude::*; +use futures::FutureExt; use nexus_db_errors::ErrorHandler; +use nexus_db_errors::OptionalError; use nexus_db_errors::public_error_from_diesel; use nexus_db_model::AllSchemaVersions; +use nexus_db_model::DB_METADATA_NEXUS_SCHEMA_VERSION; +use nexus_db_model::DbMetadataNexus; +use nexus_db_model::DbMetadataNexusState; use nexus_db_model::EARLIEST_SUPPORTED_VERSION; use nexus_db_model::SchemaUpgradeStep; use nexus_db_model::SchemaVersion; +use nexus_types::deployment::BlueprintZoneDisposition; use omicron_common::api::external::Error; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use semver::Version; use slog::{Logger, error, info, o}; use std::ops::Bound; use std::str::FromStr; +/// Errors that can occur during handoff operations +#[derive(Debug, thiserror::Error)] +pub enum HandoffError { + #[error( + "Cannot perform handoff: \ + {active_count} Nexus instance(s) are still active. \ + All instances must be quiesced or not_yet before handoff can proceed." + )] + ActiveNexusInstancesExist { active_count: u32 }, + + #[error( + "Cannot perform handoff: \ + Nexus {nexus_id} does not have a record in db_metadata_nexus table. \ + This Nexus must be registered before it can become active." + )] + NexusNotRegistered { nexus_id: OmicronZoneUuid }, + + #[error( + "Cannot perform handoff: \ + Nexus {nexus_id} is in state {current_state:?}. \ + Must be in 'not_yet' state to become active." + )] + NexusInWrongState { + nexus_id: OmicronZoneUuid, + current_state: DbMetadataNexusState, + }, +} + +impl From for Error { + fn from(err: HandoffError) -> Self { + use HandoffError::*; + match err { + // These conditions are all errors that may occur transiently, with + // handoff from old -> new Nexus, or with multiple Nexuses + // concurrently attempting to perform the handoff operation. + // + // As a result, each returns a "503" error indicating that a retry + // should be attempted. + ActiveNexusInstancesExist { .. } + | NexusNotRegistered { .. } + | NexusInWrongState { .. } => Error::unavail(&err.to_string()), + } + } +} + // A SchemaVersion which uses a pre-release value to indicate // "incremental progress". // @@ -99,33 +155,302 @@ fn skippable_version( return false; } +/// Describes the state of the database access with respect this Nexus +#[derive(Debug, Copy, Clone, PartialEq)] +enum NexusAccess { + /// Nexus does not yet have access to the database. + DoesNotHaveAccessYet { nexus_id: OmicronZoneUuid }, + + /// Nexus has been explicitly locked out of the database. + LockedOut, + + /// Nexus should have normal access to the database + /// + /// We have a record of this Nexus, and it should have access. + HasExplicitAccess, + + /// Nexus should have normal access to the database + /// + /// We may or may not have a record of this Nexus, but it should have + /// access. + HasImplicitAccess, +} + +/// Describes the state of the schema with respect this Nexus +#[derive(Debug, Copy, Clone, PartialEq)] +enum SchemaStatus { + /// The database schema matches what we want + UpToDate, + + /// The database schema is newer than what we want + NewerThanDesired, + + /// The database schema is older than what we want + OlderThanDesired, + + /// The database schema is older than what we want, and it's + /// so old, it does not know about the "db_metadata_nexus" table. + /// + /// We should avoid accessing the "db_metadata_nexus" tables to check + /// access, because the schema for these tables may not exist. + /// + /// TODO: This may be removed, once we're confident deployed systems + /// have upgraded past DB_METADATA_NEXUS_SCHEMA_VERSION. + OlderThanDesiredSkipAccessCheck, +} + +/// Describes what setup is necessary for DataStore creation +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum DatastoreSetupAction { + /// Normal operation: The database is ready for usage + Ready, + + /// Not ready for usage yet + /// + /// The database may be ready for usage once handoff has completed. + /// The `nexus_id` here may attempt to takeover the database if it has + /// a `db_metadata_nexus` record of "not_yet", and all other records + /// are either "not_yet" or "quiesced". + NeedsHandoff { nexus_id: OmicronZoneUuid }, + + /// Start a schema update + Update, + + /// Refuse to use the database + Refuse, +} + +/// Committment that the database is willing to perform a [DatastoreSetupAction] +/// to a desired schema [Version]. +/// +/// Can be created through [DataStore::check_schema_and_access] +#[derive(Clone)] +pub struct ValidatedDatastoreSetupAction { + action: DatastoreSetupAction, + desired: Version, +} + +impl ValidatedDatastoreSetupAction { + pub fn action(&self) -> &DatastoreSetupAction { + &self.action + } + + pub fn desired_version(&self) -> &Version { + &self.desired + } +} + +impl DatastoreSetupAction { + // Interprets the combination of access and status to decide what action + // should be taken. + fn new(access: NexusAccess, status: SchemaStatus) -> Self { + use NexusAccess::*; + use SchemaStatus::*; + + match (access, status) { + // Nexus has been explicitly locked-out of using the database + (LockedOut, _) => Self::Refuse, + + // The schema updated beyond what we want, do not use it. + (_, NewerThanDesired) => Self::Refuse, + + // If we don't have access yet, but could do something once handoff + // occurs, then handoff is needed + ( + DoesNotHaveAccessYet { nexus_id }, + UpToDate | OlderThanDesired | OlderThanDesiredSkipAccessCheck, + ) => Self::NeedsHandoff { nexus_id }, + + // This is the most "normal" case: Nexus should have access to the + // database, and the schema matches what it wants. + (HasExplicitAccess | HasImplicitAccess, UpToDate) => Self::Ready, + + // If this Nexus is allowed to access the schema, but it looks + // older than what we expect, we'll need to update the schema to + // use it. + ( + HasExplicitAccess | HasImplicitAccess, + OlderThanDesired | OlderThanDesiredSkipAccessCheck, + ) => Self::Update, + } + } +} + impl DataStore { - // Ensures that the database schema matches "desired_version". - // - // - Updating the schema makes the database incompatible with older - // versions of Nexus, which are not running "desired_version". - // - This is a one-way operation that cannot be undone. - // - The caller is responsible for ensuring that the new version is valid, - // and that all running Nexus instances can understand the new schema - // version. - // - // TODO: This function assumes that all concurrently executing Nexus - // instances on the rack are operating on the same version of software. - // If that assumption is broken, nothing would stop a "new deployment" - // from making a change that invalidates the queries used by an "old - // deployment". - pub async fn ensure_schema( + // Checks if the specified Nexus has access to the database. + async fn check_nexus_access( &self, - log: &Logger, + nexus_id: OmicronZoneUuid, + ) -> Result { + // Check if any "db_metadata_nexus" rows exist. + // If they don't exist, treat the database as having access. + // + // This handles the case for: + // - Fresh deployments where RSS hasn't populated the table yet (we need + // access to finish "rack_initialization"). + // - Systems that haven't been migrated to include nexus access control + // (we need access to the database to backfill these records). + // + // After initialization/migration, this conditional should never trigger + // again. + let any_records_exist = self.database_nexus_access_any_exist().await?; + if !any_records_exist { + warn!( + &self.log, + "No db_metadata_nexus records exist - skipping access check"; + "nexus_id" => ?nexus_id, + "explanation" => "This is expected during initial deployment \ + or before migration" + ); + return Ok(NexusAccess::HasImplicitAccess); + } + + // Records exist, so enforce the access control check + let Some(state) = + self.database_nexus_access(nexus_id).await?.map(|s| s.state()) + else { + let msg = "Nexus does not have access to the database (no \ + db_metadata_nexus record)"; + warn!(&self.log, "{msg}"; "nexus_id" => ?nexus_id); + return Ok(NexusAccess::DoesNotHaveAccessYet { nexus_id }); + }; + + let status = match state { + DbMetadataNexusState::Active => { + info!( + &self.log, + "Nexus has access to the database"; + "nexus_id" => ?nexus_id + ); + NexusAccess::HasExplicitAccess + } + DbMetadataNexusState::NotYet => { + info!( + &self.log, + "Nexus does not yet have access to the database"; + "nexus_id" => ?nexus_id + ); + NexusAccess::DoesNotHaveAccessYet { nexus_id } + } + DbMetadataNexusState::Quiesced => { + let msg = "Nexus locked out of database access (quiesced)"; + error!(&self.log, "{msg}"; "nexus_id" => ?nexus_id); + NexusAccess::LockedOut + } + }; + Ok(status) + } + + // Checks the schema against a desired version. + async fn check_schema( + &self, + desired_version: Version, + ) -> Result { + let (found_version, _found_target_version) = self + .database_schema_version() + .await + .context("Cannot read database schema version")?; + + let log = self.log.new(o!( + "found_version" => found_version.to_string(), + "desired_version" => desired_version.to_string(), + )); + + use std::cmp::Ordering; + match found_version.cmp(&desired_version) { + Ordering::Less => { + warn!(log, "Found schema version is older than desired"); + if found_version < DB_METADATA_NEXUS_SCHEMA_VERSION { + Ok(SchemaStatus::OlderThanDesiredSkipAccessCheck) + } else { + Ok(SchemaStatus::OlderThanDesired) + } + } + Ordering::Equal => { + info!(log, "Database schema version is up to date"); + Ok(SchemaStatus::UpToDate) + } + Ordering::Greater => { + error!(log, "Found schema version is newer than desired"); + Ok(SchemaStatus::NewerThanDesired) + } + } + } + + /// Compares the state of the schema with the expectations of the + /// currently running Nexus. + /// + /// - `identity_check`: Describes whether or not the identity of the + /// calling Nexus should be validated before returning database access + /// - `desired_version`: The version of the database schema this + /// Nexus wants. + pub async fn check_schema_and_access( + &self, + identity_check: IdentityCheckPolicy, desired_version: Version, + ) -> Result { + let schema_status = self.check_schema(desired_version.clone()).await?; + + let nexus_access = match identity_check { + IdentityCheckPolicy::CheckAndTakeover { nexus_id } => { + match schema_status { + // If we don't think the "db_metadata_nexus" tables exist in + // the schema yet, treat them as implicitly having access. + // + // TODO: This may be removed, once we're confident deployed + // systems have upgraded past + // DB_METADATA_NEXUS_SCHEMA_VERSION. + SchemaStatus::OlderThanDesiredSkipAccessCheck => { + NexusAccess::HasImplicitAccess + } + _ => self.check_nexus_access(nexus_id).await?, + } + } + IdentityCheckPolicy::DontCare => { + // If a "nexus_id" was not supplied, skip the check, and treat + // it as having access. + // + // This is necessary for tools which access the schema without a + // running Nexus, such as the schema-updater binary. + NexusAccess::HasImplicitAccess + } + }; + + Ok(ValidatedDatastoreSetupAction { + action: DatastoreSetupAction::new(nexus_access, schema_status), + desired: desired_version, + }) + } + + /// Ensures that the database schema matches `desired_version`. + /// + /// - `validated_action`: A [ValidatedDatastoreSetupAction], indicating that + /// [Self::check_schema_and_access] has already been called. + /// - `all_versions`: A description of all schema versions between + /// "whatever is in the DB" and `desired_version`, instructing + /// how to perform an update. + pub async fn update_schema( + &self, + validated_action: ValidatedDatastoreSetupAction, all_versions: Option<&AllSchemaVersions>, ) -> Result<(), anyhow::Error> { + let action = validated_action.action(); + + match action { + DatastoreSetupAction::Ready => { + bail!("No schema update is necessary") + } + DatastoreSetupAction::Update => (), + _ => bail!("Not ready for schema update"), + } + + let desired_version = validated_action.desired_version().clone(); let (found_version, found_target_version) = self .database_schema_version() .await .context("Cannot read database schema version")?; - let log = log.new(o!( + let log = self.log.new(o!( "found_version" => found_version.to_string(), "desired_version" => desired_version.to_string(), )); @@ -340,6 +665,298 @@ impl DataStore { Ok(()) } + // Returns the access this Nexus has to the database + async fn database_nexus_access( + &self, + nexus_id: OmicronZoneUuid, + ) -> Result, Error> { + use nexus_db_schema::schema::db_metadata_nexus::dsl; + + let nexus_access: Option = dsl::db_metadata_nexus + .filter( + dsl::nexus_id.eq(nexus_db_model::to_db_typed_uuid(nexus_id)), + ) + .first_async(&*self.pool_connection_unauthorized().await?) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(nexus_access) + } + + // Checks if any db_metadata_nexus records exist in the database + async fn database_nexus_access_any_exist(&self) -> Result { + let conn = self.pool_connection_unauthorized().await?; + Self::database_nexus_access_any_exist_on_connection(&conn).await + } + + // Checks if any db_metadata_nexus records exist in the database using an + // existing connection + async fn database_nexus_access_any_exist_on_connection( + conn: &async_bb8_diesel::Connection, + ) -> Result { + use nexus_db_schema::schema::db_metadata_nexus::dsl; + + let exists: bool = diesel::select(diesel::dsl::exists( + dsl::db_metadata_nexus.select(dsl::nexus_id), + )) + .get_result_async(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(exists) + } + + /// Deletes the "db_metadata_nexus" record for a Nexus ID, if it exists. + pub async fn database_nexus_access_delete( + &self, + opctx: &OpContext, + nexus_id: OmicronZoneUuid, + ) -> Result<(), Error> { + use nexus_db_schema::schema::db_metadata_nexus::dsl; + + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let conn = &*self.pool_connection_authorized(&opctx).await?; + + diesel::delete( + dsl::db_metadata_nexus + .filter(dsl::nexus_id.eq(nexus_id.into_untyped_uuid())), + ) + .execute_async(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + + /// Propagate the nexus records to the database if and only if + /// the blueprint is the current target. + /// + /// If any of these records already exist, they are unmodified. + pub async fn database_nexus_access_create( + &self, + opctx: &OpContext, + blueprint: &nexus_types::deployment::Blueprint, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + + // TODO: Without https://github.com/oxidecomputer/omicron/pull/8863, we + // treat all Nexuses as active. Some will become "not_yet", depending on + // the Nexus Generation, once it exists. + let active_nexus_zones = blueprint + .all_omicron_zones(BlueprintZoneDisposition::is_in_service) + .filter_map(|(_sled, zone_cfg)| { + if zone_cfg.zone_type.is_nexus() { + Some(zone_cfg) + } else { + None + } + }); + let new_nexuses = active_nexus_zones + .map(|z| DbMetadataNexus::new(z.id, DbMetadataNexusState::Active)) + .collect::>(); + + let conn = &*self.pool_connection_authorized(&opctx).await?; + self.transaction_if_current_blueprint_is( + &conn, + "database_nexus_access_create", + opctx, + blueprint.id, + |conn| { + let new_nexuses = new_nexuses.clone(); + async move { + use nexus_db_schema::schema::db_metadata_nexus::dsl; + + diesel::insert_into(dsl::db_metadata_nexus) + .values(new_nexuses) + .on_conflict(dsl::nexus_id) + .do_nothing() + .execute_async(conn) + .await?; + Ok(()) + } + .boxed() + }, + ) + .await + } + + // Registers a Nexus instance as having active access to the database + #[cfg(test)] + async fn database_nexus_access_insert( + &self, + nexus_id: OmicronZoneUuid, + state: DbMetadataNexusState, + ) -> Result<(), Error> { + use nexus_db_schema::schema::db_metadata_nexus::dsl; + + let new_nexus = DbMetadataNexus::new(nexus_id, state); + + diesel::insert_into(dsl::db_metadata_nexus) + .values(new_nexus) + .on_conflict(dsl::nexus_id) + .do_update() + .set(dsl::state.eq(diesel::upsert::excluded(dsl::state))) + .execute_async(&*self.pool_connection_unauthorized().await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + + /// Initializes Nexus database access records from a blueprint using an + /// existing connection + /// + /// This function finds all Nexus zones in the given blueprint and creates + /// active database access records for them. Used during RSS rack setup. + /// + /// Returns an error if: + /// - Any db_metadata_nexus records already exist (should only be called + /// during initial setup) + pub async fn initialize_nexus_access_from_blueprint_on_connection( + &self, + conn: &async_bb8_diesel::Connection, + nexus_zone_ids: Vec, + ) -> Result<(), Error> { + use nexus_db_schema::schema::db_metadata_nexus::dsl; + + // Ensure no db_metadata_nexus records already exist + let any_records_exist = + Self::database_nexus_access_any_exist_on_connection(conn).await?; + if any_records_exist { + return Err(Error::conflict( + "Cannot initialize Nexus access from blueprint: \ + db_metadata_nexus records already exist. This function should \ + only be called during initial rack setup.", + )); + } + + // Create db_metadata_nexus records for all Nexus zones + let new_nexuses: Vec = nexus_zone_ids + .iter() + .map(|&nexus_id| { + DbMetadataNexus::new(nexus_id, DbMetadataNexusState::Active) + }) + .collect(); + + diesel::insert_into(dsl::db_metadata_nexus) + .values(new_nexuses) + .execute_async(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + + // Implementation function for attempt_handoff that runs within a + // transaction + // + // This function must be executed from a transaction context to be safe. + async fn attempt_handoff_impl( + conn: async_bb8_diesel::Connection, + nexus_id: OmicronZoneUuid, + err: OptionalError, + ) -> Result<(), diesel::result::Error> { + use nexus_db_schema::schema::db_metadata_nexus::dsl; + + // Before proceeding, all records must be in the "quiesced" or "not_yet" + // states. + // + // We explicitly look for any records violating this, rather than + // explicitly looking for "active" records, as to protect ourselves from + // future states being added over time. + // + // There is no concern of time-of-check-to-time-of-use bugs because + // this function must be executed within a transaction. + let active_count: nexus_db_model::SqlU32 = dsl::db_metadata_nexus + .filter( + dsl::state + .ne(DbMetadataNexusState::Quiesced) + .and(dsl::state.ne(DbMetadataNexusState::NotYet)), + ) + .count() + .get_result_async(&conn) + .await?; + let active_count: u32 = active_count.0; + if active_count > 0 { + return Err(err.bail(HandoffError::ActiveNexusInstancesExist { + active_count, + })); + } + + // Check that our nexus has a "not_yet" record + // + // Only read the "state" field to avoid reading the rest of the struct, + // in case additional columns are added over time. + let our_nexus_state: Option = + dsl::db_metadata_nexus + .filter( + dsl::nexus_id + .eq(nexus_db_model::to_db_typed_uuid(nexus_id)), + ) + .select(dsl::state) + .get_result_async(&conn) + .await + .optional()?; + let Some(our_state) = our_nexus_state else { + return Err(err.bail(HandoffError::NexusNotRegistered { nexus_id })); + }; + if our_state != DbMetadataNexusState::NotYet { + return Err(err.bail(HandoffError::NexusInWrongState { + nexus_id, + current_state: our_state, + })); + } + + // Update all "not_yet" records to "active" + diesel::update(dsl::db_metadata_nexus) + .filter(dsl::state.eq(DbMetadataNexusState::NotYet)) + .set(dsl::state.eq(DbMetadataNexusState::Active)) + .execute_async(&conn) + .await?; + + Ok(()) + } + + /// Attempts to perform a handoff to activate this Nexus for database + /// access. + /// + /// This function checks that: + /// 1. ALL records in db_metadata_nexus are in "not_yet" or "quiesced" + /// states + /// 2. The specified nexus_id has a record which is "not_yet" + /// + /// If both conditions are met, it updates ALL "not_yet" records to + /// "active". These operations are performed transactionally. + /// + /// Returns an error if: + /// - Any record is in "active" state + /// - The specified nexus_id doesn't have a "not_yet" record + /// - Database transaction fails + pub async fn attempt_handoff( + &self, + nexus_id: OmicronZoneUuid, + ) -> Result<(), Error> { + let err = OptionalError::new(); + let conn = self.pool_connection_unauthorized().await?; + + self.transaction_retry_wrapper("attempt_handoff") + .transaction(&conn, |conn| { + let err = err.clone(); + async move { + Self::attempt_handoff_impl(conn, nexus_id, err).await + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + err.into() + } else { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) + } + pub async fn database_schema_version( &self, ) -> Result<(Version, Option), Error> { @@ -494,24 +1111,71 @@ impl DataStore { #[cfg(test)] mod test { use super::*; + use crate::db::datastore::IdentityCheckPolicy; use crate::db::pub_test_utils::TestDatabase; use camino::Utf8Path; use camino_tempfile::Utf8TempDir; + use id_map::IdMap; use nexus_db_model::SCHEMA_VERSION; + use nexus_inventory::now_db_precision; + use nexus_types::deployment::Blueprint; + use nexus_types::deployment::BlueprintHostPhase2DesiredSlots; + use nexus_types::deployment::BlueprintSledConfig; + use nexus_types::deployment::BlueprintTarget; + use nexus_types::deployment::BlueprintZoneConfig; + use nexus_types::deployment::BlueprintZoneDisposition; + use nexus_types::deployment::BlueprintZoneImageSource; + use nexus_types::deployment::BlueprintZoneType; + use nexus_types::deployment::CockroachDbPreserveDowngrade; + use nexus_types::deployment::OximeterReadMode; + use nexus_types::deployment::PendingMgsUpdates; + use nexus_types::deployment::PlanningReport; + use nexus_types::deployment::blueprint_zone_type; + use nexus_types::external_api::views::SledState; + use nexus_types::inventory::NetworkInterface; + use nexus_types::inventory::NetworkInterfaceKind; + use omicron_common::api::external::Generation; + use omicron_common::api::external::MacAddr; + use omicron_common::api::external::Vni; + use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev; + use omicron_uuid_kinds::BlueprintUuid; + use omicron_uuid_kinds::ExternalIpUuid; + use omicron_uuid_kinds::SledUuid; + use omicron_uuid_kinds::ZpoolUuid; + use std::collections::BTreeMap; // Confirms that calling the internal "ensure_schema" function can succeed // when the database is already at that version. #[tokio::test] - async fn ensure_schema_is_current_version() { - let logctx = dev::test_setup_log("ensure_schema_is_current_version"); + async fn check_schema_is_current_version() { + let logctx = dev::test_setup_log("check_schema_is_current_version"); let db = TestDatabase::new_with_raw_datastore(&logctx.log).await; let datastore = db.datastore(); - datastore - .ensure_schema(&logctx.log, SCHEMA_VERSION, None) + let checked_action = datastore + .check_schema_and_access( + IdentityCheckPolicy::DontCare, + SCHEMA_VERSION, + ) .await - .expect("Failed to ensure schema"); + .expect("Failed to check schema and access"); + + assert!( + matches!(checked_action.action(), DatastoreSetupAction::Ready), + "Unexpected action: {:?}", + checked_action.action(), + ); + assert_eq!( + checked_action.desired_version(), + &SCHEMA_VERSION, + "Unexpected desired version: {}", + checked_action.desired_version() + ); + + datastore.update_schema(checked_action, None).await.expect_err( + "Should not be able to update schema that's already up-to-date", + ); db.terminate().await; logctx.cleanup_successful(); @@ -614,8 +1278,13 @@ mod test { let log = log.clone(); let pool = pool.clone(); tokio::task::spawn(async move { - let datastore = - DataStore::new(&log, pool, Some(&all_versions)).await?; + let datastore = DataStore::new( + &log, + pool, + Some(&all_versions), + IdentityCheckPolicy::DontCare, + ) + .await?; // This is the crux of this test: confirm that, as each // migration completes, it's not possible to see any artifacts @@ -742,9 +1411,23 @@ mod test { // Manually construct the datastore to avoid the backoff timeout. // We want to trigger errors, but have no need to wait. + let datastore = DataStore::new_unchecked(log.clone(), pool.clone()); + let checked_action = datastore + .check_schema_and_access( + IdentityCheckPolicy::DontCare, + SCHEMA_VERSION, + ) + .await + .expect("Failed to check schema and access"); + + // This needs to be in a loop because we constructed a schema change + // that will intentionally fail sometimes when doing this work. + // + // This isn't a normal behavior! But we're trying to test the + // intermediate steps of a schema change here. while let Err(e) = datastore - .ensure_schema(&log, SCHEMA_VERSION, Some(&all_versions)) + .update_schema(checked_action.clone(), Some(&all_versions)) .await { warn!(log, "Failed to ensure schema"; "err" => %e); @@ -771,4 +1454,991 @@ mod test { db.terminate().await; logctx.cleanup_successful(); } + + #[tokio::test] + async fn test_attempt_handoff_with_active_records() { + let logctx = + dev::test_setup_log("test_attempt_handoff_with_active_records"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let datastore = + DataStore::new_unchecked(logctx.log.clone(), db.pool().clone()); + + // Set up test data: create some nexus records, including one active + let nexus1_id = OmicronZoneUuid::new_v4(); + let nexus2_id = OmicronZoneUuid::new_v4(); + let nexus3_id = OmicronZoneUuid::new_v4(); + + // Insert records: one active, one not_yet, one quiesced + datastore + .database_nexus_access_insert( + nexus1_id, + DbMetadataNexusState::Active, + ) + .await + .expect("Failed to insert active nexus"); + datastore + .database_nexus_access_insert( + nexus2_id, + DbMetadataNexusState::NotYet, + ) + .await + .expect("Failed to insert not_yet nexus"); + datastore + .database_nexus_access_insert( + nexus3_id, + DbMetadataNexusState::Quiesced, + ) + .await + .expect("Failed to insert quiesced nexus"); + + // Attempt handoff with nexus2 - should fail because nexus1 is active + let result = datastore.attempt_handoff(nexus2_id).await; + assert!(result.is_err()); + let error_msg = format!("{}", result.unwrap_err()); + assert!( + error_msg.contains("1 Nexus instance(s) are still active"), + "Expected error about active instances, got: {}", + error_msg + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_attempt_handoff_nexus_not_registered() { + let logctx = + dev::test_setup_log("test_attempt_handoff_nexus_not_registered"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let datastore = + DataStore::new_unchecked(logctx.log.clone(), db.pool().clone()); + + // Set up test data: create some other nexus records but not the one we're trying to handoff + let nexus1_id = OmicronZoneUuid::new_v4(); + let nexus2_id = OmicronZoneUuid::new_v4(); + let unregistered_nexus_id = OmicronZoneUuid::new_v4(); + + datastore + .database_nexus_access_insert( + nexus1_id, + DbMetadataNexusState::NotYet, + ) + .await + .expect("Failed to insert nexus1"); + datastore + .database_nexus_access_insert( + nexus2_id, + DbMetadataNexusState::Quiesced, + ) + .await + .expect("Failed to insert nexus2"); + + // Attempt handoff with unregistered nexus - should fail + let result = datastore.attempt_handoff(unregistered_nexus_id).await; + assert!(result.is_err()); + let error_msg = format!("{}", result.unwrap_err()); + assert!( + error_msg + .contains("does not have a record in db_metadata_nexus table"), + "Expected error about unregistered nexus, got: {}", + error_msg + ); + assert!( + error_msg.contains(&unregistered_nexus_id.to_string()), + "Expected error to contain nexus ID {}, got: {}", + unregistered_nexus_id, + error_msg + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_attempt_handoff_nexus_wrong_state() { + let logctx = + dev::test_setup_log("test_attempt_handoff_nexus_wrong_state"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let datastore = + DataStore::new_unchecked(logctx.log.clone(), db.pool().clone()); + + // Set up test data: create nexus records where our target is in wrong state + let nexus1_id = OmicronZoneUuid::new_v4(); + let nexus2_id = OmicronZoneUuid::new_v4(); + let quiesced_nexus_id = OmicronZoneUuid::new_v4(); + + datastore + .database_nexus_access_insert( + nexus1_id, + DbMetadataNexusState::NotYet, + ) + .await + .expect("Failed to insert nexus1"); + datastore + .database_nexus_access_insert( + nexus2_id, + DbMetadataNexusState::NotYet, + ) + .await + .expect("Failed to insert nexus2"); + datastore + .database_nexus_access_insert( + quiesced_nexus_id, + DbMetadataNexusState::Quiesced, + ) + .await + .expect("Failed to insert quiesced nexus"); + + // Attempt handoff with quiesced nexus - should fail + let result = datastore.attempt_handoff(quiesced_nexus_id).await; + assert!(result.is_err()); + let error_msg = format!("{}", result.unwrap_err()); + assert!( + error_msg.contains("is in state Quiesced"), + "Expected error about wrong state, got: {}", + error_msg + ); + assert!( + error_msg.contains("Must be in 'not_yet' state to become active"), + "Expected error to mention required state, got: {}", + error_msg + ); + assert!( + error_msg.contains(&quiesced_nexus_id.to_string()), + "Expected error to contain nexus ID {}, got: {}", + quiesced_nexus_id, + error_msg + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_attempt_handoff_success() { + let logctx = dev::test_setup_log("test_attempt_handoff_success"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let datastore = + DataStore::new_unchecked(logctx.log.clone(), db.pool().clone()); + + // Set up test data: create multiple nexus records in not_yet and + // quiesced states + let nexus1_id = OmicronZoneUuid::new_v4(); + let nexus2_id = OmicronZoneUuid::new_v4(); + let nexus3_id = OmicronZoneUuid::new_v4(); + + datastore + .database_nexus_access_insert( + nexus1_id, + DbMetadataNexusState::NotYet, + ) + .await + .expect("Failed to insert nexus1"); + datastore + .database_nexus_access_insert( + nexus2_id, + DbMetadataNexusState::NotYet, + ) + .await + .expect("Failed to insert nexus2"); + datastore + .database_nexus_access_insert( + nexus3_id, + DbMetadataNexusState::Quiesced, + ) + .await + .expect("Failed to insert nexus3"); + + // Verify initial state: all not_yet or quiesced + let nexus1_before = datastore + .database_nexus_access(nexus1_id) + .await + .expect("Failed to get nexus1") + .expect("nexus1 should exist"); + let nexus2_before = datastore + .database_nexus_access(nexus2_id) + .await + .expect("Failed to get nexus2") + .expect("nexus2 should exist"); + let nexus3_before = datastore + .database_nexus_access(nexus3_id) + .await + .expect("Failed to get nexus3") + .expect("nexus3 should exist"); + + assert_eq!(nexus1_before.state(), DbMetadataNexusState::NotYet); + assert_eq!(nexus2_before.state(), DbMetadataNexusState::NotYet); + assert_eq!(nexus3_before.state(), DbMetadataNexusState::Quiesced); + + // Attempt handoff with nexus2 - should succeed + let result = datastore.attempt_handoff(nexus2_id).await; + if let Err(ref e) = result { + panic!("Handoff should succeed but got error: {}", e); + } + assert!(result.is_ok()); + + // Verify final state: all not_yet records should now be active, + // quiesced should remain quiesced + let nexus1_after = datastore + .database_nexus_access(nexus1_id) + .await + .expect("Failed to get nexus1") + .expect("nexus1 should exist"); + let nexus2_after = datastore + .database_nexus_access(nexus2_id) + .await + .expect("Failed to get nexus2") + .expect("nexus2 should exist"); + let nexus3_after = datastore + .database_nexus_access(nexus3_id) + .await + .expect("Failed to get nexus3") + .expect("nexus3 should exist"); + + assert_eq!(nexus1_after.state(), DbMetadataNexusState::Active); + assert_eq!(nexus2_after.state(), DbMetadataNexusState::Active); + // Should remain unchanged + assert_eq!(nexus3_after.state(), DbMetadataNexusState::Quiesced); + + db.terminate().await; + logctx.cleanup_successful(); + } + + // This test covers two cases: + // + // 1. New systems: We use RSS to initialize Nexus, but no db_metadata_nexus + // entries exist. + // 2. Deployed systems: We have a deployed system which updates to have this + // "db_metadata_nexus"-handling code, but has no rows in that table. + // + // Both of these cases must be granted database access to self-populate + // later. + #[tokio::test] + async fn test_check_schema_and_access_empty_table_permits_access() { + let logctx = dev::test_setup_log( + "test_check_schema_and_access_empty_table_permits_access", + ); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let datastore = + DataStore::new_unchecked(logctx.log.clone(), db.pool().clone()); + + let nexus_id = OmicronZoneUuid::new_v4(); + + // With an empty table, even explicit nexus ID should get access + let action = datastore + .check_schema_and_access( + IdentityCheckPolicy::CheckAndTakeover { nexus_id }, + SCHEMA_VERSION, + ) + .await + .expect("Failed to check schema and access"); + assert_eq!(action.action(), &DatastoreSetupAction::Ready); + + // Add a record to the table, now explicit nexus ID should NOT get + // access + datastore + .database_nexus_access_insert( + OmicronZoneUuid::new_v4(), // Different nexus + DbMetadataNexusState::Active, + ) + .await + .expect("Failed to insert nexus record"); + + let action = datastore + .check_schema_and_access( + IdentityCheckPolicy::CheckAndTakeover { nexus_id }, + SCHEMA_VERSION, + ) + .await + .expect("Failed to check schema and access"); + assert_eq!( + action.action(), + &DatastoreSetupAction::NeedsHandoff { nexus_id } + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Validates the case where a Nexus ID is explicitly requested or omitted. + // + // The omission case is important for the "schema-updater" binary to keep working. + #[tokio::test] + async fn test_check_schema_and_access_nexus_id() { + let logctx = + dev::test_setup_log("test_check_schema_and_access_nexus_id"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let datastore = + DataStore::new_unchecked(logctx.log.clone(), db.pool().clone()); + + // Add an active record, for some Nexus ID. + datastore + .database_nexus_access_insert( + OmicronZoneUuid::new_v4(), + DbMetadataNexusState::Active, + ) + .await + .expect("Failed to insert nexus record"); + + // Using 'DontCare' as a nexus ID should get access (schema updater case) + let action = datastore + .check_schema_and_access( + IdentityCheckPolicy::DontCare, + SCHEMA_VERSION, + ) + .await + .expect("Failed to check schema and access"); + assert_eq!(action.action(), &DatastoreSetupAction::Ready); + + // Explicit CheckAndTakeover with a Nexus ID that doesn't exist should + // not get access + let nexus_id = OmicronZoneUuid::new_v4(); + let action = datastore + .check_schema_and_access( + IdentityCheckPolicy::CheckAndTakeover { nexus_id }, + SCHEMA_VERSION, + ) + .await + .expect("Failed to check schema and access"); + assert_eq!( + action.action(), + &DatastoreSetupAction::NeedsHandoff { nexus_id }, + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Validates that an explicit db_metadata_nexus record can lock-out Nexuses which should not be + // able to access the database. + #[tokio::test] + async fn test_check_schema_and_access_lockout_refuses_access() { + let logctx = dev::test_setup_log( + "test_check_schema_and_access_lockout_refuses_access", + ); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let datastore = + DataStore::new_unchecked(logctx.log.clone(), db.pool().clone()); + + let nexus_id = OmicronZoneUuid::new_v4(); + + // Insert our nexus as quiesced (locked out) + datastore + .database_nexus_access_insert( + nexus_id, + DbMetadataNexusState::Quiesced, + ) + .await + .expect("Failed to insert nexus record"); + + // Should refuse access + let action = datastore + .check_schema_and_access( + IdentityCheckPolicy::CheckAndTakeover { nexus_id }, + SCHEMA_VERSION, + ) + .await + .expect("Failed to check schema and access"); + assert_eq!(action.action(), &DatastoreSetupAction::Refuse); + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Validates that if a Nexus with an "old desired schema" boots, it cannot access the + // database under any conditions. + // + // This is the case where the database has upgraded beyond what Nexus can understand. + // + // In practice, the db_metadata_nexus records should prevent this situation from occurring, + // but it's still a useful property to reject old schemas while the "schema-updater" binary + // exists. + #[tokio::test] + async fn test_check_schema_and_access_schema_too_new() { + let logctx = + dev::test_setup_log("test_check_schema_and_access_schema_too_new"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let datastore = + DataStore::new_unchecked(logctx.log.clone(), db.pool().clone()); + + let nexus_id = OmicronZoneUuid::new_v4(); + + // Insert our nexus as active + datastore + .database_nexus_access_insert( + nexus_id, + DbMetadataNexusState::Active, + ) + .await + .expect("Failed to insert nexus record"); + + // Try to access with an older version than what's in the database + let older_version = Version::new(SCHEMA_VERSION.major - 1, 0, 0); + + // Explicit Nexus ID: Rejected + let action = datastore + .check_schema_and_access( + IdentityCheckPolicy::CheckAndTakeover { nexus_id }, + older_version.clone(), + ) + .await + .expect("Failed to check schema and access"); + assert_eq!(action.action(), &DatastoreSetupAction::Refuse); + + // Implicit Access: Rejected + let action = datastore + .check_schema_and_access( + IdentityCheckPolicy::DontCare, + older_version.clone(), + ) + .await + .expect("Failed to check schema and access"); + assert_eq!(action.action(), &DatastoreSetupAction::Refuse); + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Validates that the schema + access combinations identify we should wait for handoff + // when we have a "NotYet" record that could become compatible with the database. + #[tokio::test] + async fn test_check_schema_and_access_wait_for_handoff() { + let logctx = dev::test_setup_log( + "test_check_schema_and_access_wait_for_handoff", + ); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let datastore = + DataStore::new_unchecked(logctx.log.clone(), db.pool().clone()); + + let nexus_id = OmicronZoneUuid::new_v4(); + + // Insert our nexus as not_yet (doesn't have access yet) + datastore + .database_nexus_access_insert( + nexus_id, + DbMetadataNexusState::NotYet, + ) + .await + .expect("Failed to insert nexus record"); + + // We should wait for handoff if the versions match, or if our desired + // version is newer than what exists in the database. + let current_version = SCHEMA_VERSION; + let newer_version = Version::new(SCHEMA_VERSION.major + 1, 0, 0); + let versions = [current_version, newer_version]; + + for version in &versions { + // Should wait for handoff when schema is up-to-date + let action = datastore + .check_schema_and_access( + IdentityCheckPolicy::CheckAndTakeover { nexus_id }, + version.clone(), + ) + .await + .expect("Failed to check schema and access"); + assert_eq!( + action.action(), + &DatastoreSetupAction::NeedsHandoff { nexus_id }, + ); + } + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Validates the "normal case", where a Nexus has access and the schema already matches. + #[tokio::test] + async fn test_check_schema_and_access_normal_use() { + let logctx = + dev::test_setup_log("test_check_schema_and_access_normal_use"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let datastore = + DataStore::new_unchecked(logctx.log.clone(), db.pool().clone()); + + let nexus_id = OmicronZoneUuid::new_v4(); + + // Insert our nexus as active + datastore + .database_nexus_access_insert( + nexus_id, + DbMetadataNexusState::Active, + ) + .await + .expect("Failed to insert nexus record"); + + // With current schema version, should be ready for normal use + let action = datastore + .check_schema_and_access( + IdentityCheckPolicy::CheckAndTakeover { nexus_id }, + SCHEMA_VERSION, + ) + .await + .expect("Failed to check schema and access"); + + assert_eq!(action.action(), &DatastoreSetupAction::Ready); + assert_eq!(action.desired_version(), &SCHEMA_VERSION); + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Validates that when a Nexus is active with a newer-than-database desired + // version, it will request an update + #[tokio::test] + async fn test_check_schema_and_access_update_now() { + let logctx = + dev::test_setup_log("test_check_schema_and_access_update_now"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let datastore = + DataStore::new_unchecked(logctx.log.clone(), db.pool().clone()); + + let nexus_id = OmicronZoneUuid::new_v4(); + + // Insert our nexus as active + datastore + .database_nexus_access_insert( + nexus_id, + DbMetadataNexusState::Active, + ) + .await + .expect("Failed to insert nexus record"); + + let newer_version = Version::new(SCHEMA_VERSION.major + 1, 0, 0); + + // With a newer desired version, should request update + let action = datastore + .check_schema_and_access( + IdentityCheckPolicy::CheckAndTakeover { nexus_id }, + newer_version.clone(), + ) + .await + .expect("Failed to check schema and access"); + + assert_eq!(action.action(), &DatastoreSetupAction::Update); + assert_eq!(action.desired_version(), &newer_version); + + db.terminate().await; + logctx.cleanup_successful(); + } + + fn create_test_blueprint( + nexus_zones: Vec<(OmicronZoneUuid, BlueprintZoneDisposition)>, + ) -> Blueprint { + let blueprint_id = BlueprintUuid::new_v4(); + let sled_id = SledUuid::new_v4(); + + let zones: IdMap = nexus_zones + .into_iter() + .map(|(zone_id, disposition)| BlueprintZoneConfig { + disposition, + id: zone_id, + filesystem_pool: ZpoolName::new_external(ZpoolUuid::new_v4()), + zone_type: BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address: "[::1]:0".parse().unwrap(), + external_dns_servers: Vec::new(), + external_ip: nexus_types::deployment::OmicronZoneExternalFloatingIp { + id: ExternalIpUuid::new_v4(), + ip: std::net::IpAddr::V6(std::net::Ipv6Addr::LOCALHOST), + }, + external_tls: true, + nic: NetworkInterface { + id: uuid::Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: zone_id.into_untyped_uuid(), + }, + name: "test-nic".parse().unwrap(), + ip: "192.168.1.1".parse().unwrap(), + mac: MacAddr::random_system(), + subnet: ipnetwork::IpNetwork::V4( + "192.168.1.0/24".parse().unwrap() + ).into(), + vni: Vni::try_from(100).unwrap(), + primary: true, + slot: 0, + transit_ips: Vec::new(), + }, + }), + image_source: BlueprintZoneImageSource::InstallDataset, + }) + .collect(); + + let mut sleds = BTreeMap::new(); + sleds.insert( + sled_id, + BlueprintSledConfig { + state: SledState::Active, + sled_agent_generation: Generation::new(), + zones, + disks: IdMap::new(), + datasets: IdMap::new(), + remove_mupdate_override: None, + host_phase_2: BlueprintHostPhase2DesiredSlots::current_contents( + ), + }, + ); + + Blueprint { + id: blueprint_id, + sleds, + pending_mgs_updates: PendingMgsUpdates::new(), + parent_blueprint_id: None, + internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), + target_release_minimum_generation: Generation::new(), + cockroachdb_fingerprint: String::new(), + cockroachdb_setting_preserve_downgrade: + CockroachDbPreserveDowngrade::DoNotModify, + clickhouse_cluster_config: None, + oximeter_read_mode: OximeterReadMode::SingleNode, + oximeter_read_version: Generation::new(), + time_created: now_db_precision(), + creator: "test suite".to_string(), + comment: "test blueprint".to_string(), + report: PlanningReport::new(blueprint_id), + } + } + + #[tokio::test] + async fn test_database_nexus_access_create() { + let logctx = dev::test_setup_log("test_database_nexus_access_create"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let datastore = db.datastore(); + let opctx = db.opctx(); + + // Create a blueprint with two in-service Nexus zones, + // and one expunged Nexus. + let nexus1_id = OmicronZoneUuid::new_v4(); + let nexus2_id = OmicronZoneUuid::new_v4(); + let expunged_nexus = OmicronZoneUuid::new_v4(); + let blueprint = create_test_blueprint(vec![ + (nexus1_id, BlueprintZoneDisposition::InService), + (nexus2_id, BlueprintZoneDisposition::InService), + ( + expunged_nexus, + BlueprintZoneDisposition::Expunged { + as_of_generation: Generation::new(), + ready_for_cleanup: true, + }, + ), + ]); + + // Insert the blueprint and make it the target + datastore + .blueprint_insert(&opctx, &blueprint) + .await + .expect("Failed to insert blueprint"); + datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: blueprint.id, + enabled: false, + time_made_target: chrono::Utc::now(), + }, + ) + .await + .expect("Failed to set blueprint target"); + + // Create nexus access records + datastore + .database_nexus_access_create(&opctx, &blueprint) + .await + .expect("Failed to create nexus access"); + + // Verify records were created with Active state + let nexus1_access = datastore + .database_nexus_access(nexus1_id) + .await + .expect("Failed to get nexus1 access"); + let nexus2_access = datastore + .database_nexus_access(nexus2_id) + .await + .expect("Failed to get nexus2 access"); + let expunged_access = datastore + .database_nexus_access(expunged_nexus) + .await + .expect("Failed to get expunged access"); + + assert!(nexus1_access.is_some(), "nexus1 should have access record"); + assert!(nexus2_access.is_some(), "nexus2 should have access record"); + assert!( + expunged_access.is_none(), + "expunged nexus should not have access record" + ); + + let nexus1_record = nexus1_access.unwrap(); + let nexus2_record = nexus2_access.unwrap(); + assert_eq!(nexus1_record.state(), DbMetadataNexusState::Active); + assert_eq!(nexus2_record.state(), DbMetadataNexusState::Active); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_database_nexus_access_create_idempotent() { + let logctx = + dev::test_setup_log("test_database_nexus_access_create_idempotent"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let datastore = db.datastore(); + let opctx = db.opctx(); + + // Create a blueprint with one Nexus zone + let nexus_id = OmicronZoneUuid::new_v4(); + let blueprint = create_test_blueprint(vec![( + nexus_id, + BlueprintZoneDisposition::InService, + )]); + + // Insert the blueprint and make it the target + datastore + .blueprint_insert(&opctx, &blueprint) + .await + .expect("Failed to insert blueprint"); + datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: blueprint.id, + enabled: false, + time_made_target: chrono::Utc::now(), + }, + ) + .await + .expect("Failed to set blueprint target"); + + // Create nexus access records (first time) + datastore + .database_nexus_access_create(&opctx, &blueprint) + .await + .expect("Failed to create nexus access (first time)"); + + // Verify record was created + async fn confirm_state( + datastore: &DataStore, + nexus_id: OmicronZoneUuid, + expected_state: DbMetadataNexusState, + ) { + let state = datastore + .database_nexus_access(nexus_id) + .await + .expect("Failed to get nexus access after first create") + .expect("Entry for Nexus should have been inserted"); + assert_eq!(state.state(), expected_state); + } + + confirm_state(datastore, nexus_id, DbMetadataNexusState::Active).await; + + // Creating the record again: not an error. + datastore + .database_nexus_access_create(&opctx, &blueprint) + .await + .expect("Failed to create nexus access (first time)"); + confirm_state(datastore, nexus_id, DbMetadataNexusState::Active).await; + + // Manually make the record "Quiesced". + use nexus_db_schema::schema::db_metadata_nexus::dsl; + diesel::update(dsl::db_metadata_nexus) + .filter(dsl::nexus_id.eq(nexus_id.into_untyped_uuid())) + .set(dsl::state.eq(DbMetadataNexusState::Quiesced)) + .execute_async( + &*datastore.pool_connection_unauthorized().await.unwrap(), + ) + .await + .expect("Failed to update record"); + confirm_state(datastore, nexus_id, DbMetadataNexusState::Quiesced) + .await; + + // Create nexus access records another time - should be idempotent, + // but should be "on-conflict, ignore". + datastore + .database_nexus_access_create(&opctx, &blueprint) + .await + .expect("Failed to create nexus access (second time)"); + confirm_state(datastore, nexus_id, DbMetadataNexusState::Quiesced) + .await; + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_database_nexus_access_create_fails_wrong_target_blueprint() { + let logctx = dev::test_setup_log( + "test_database_nexus_access_create_fails_wrong_target_blueprint", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let datastore = db.datastore(); + let opctx = db.opctx(); + + // Create two different blueprints + let nexus_id = OmicronZoneUuid::new_v4(); + let target_blueprint = create_test_blueprint(vec![( + nexus_id, + BlueprintZoneDisposition::InService, + )]); + let non_target_blueprint = create_test_blueprint(vec![( + nexus_id, + BlueprintZoneDisposition::InService, + )]); + + // Insert both blueprints + datastore + .blueprint_insert(&opctx, &target_blueprint) + .await + .expect("Failed to insert target blueprint"); + datastore + .blueprint_insert(&opctx, &non_target_blueprint) + .await + .expect("Failed to insert non-target blueprint"); + + // Set the first blueprint as the target + datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: target_blueprint.id, + enabled: false, + time_made_target: chrono::Utc::now(), + }, + ) + .await + .expect("Failed to set target blueprint"); + + // Try to create nexus access records using the non-target blueprint. + // This should fail because the transaction should check if the + // blueprint is the current target + let result = datastore + .database_nexus_access_create(&opctx, &non_target_blueprint) + .await; + assert!( + result.is_err(), + "Creating nexus access with wrong target blueprint should fail" + ); + + // Verify no records were created for the nexus + let access = datastore + .database_nexus_access(nexus_id) + .await + .expect("Failed to get nexus access"); + assert!( + access.is_none(), + "No access record should exist when wrong blueprint is used" + ); + + // Verify that using the correct target blueprint works + datastore + .database_nexus_access_create(&opctx, &target_blueprint) + .await + .expect( + "Creating nexus access with correct blueprint should succeed", + ); + + let access_after_correct = datastore + .database_nexus_access(nexus_id) + .await + .expect("Failed to get nexus access after correct blueprint"); + assert!( + access_after_correct.is_some(), + "Access record should exist after using correct target blueprint" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_database_nexus_access_delete() { + let logctx = dev::test_setup_log("test_database_nexus_access_delete"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let datastore = db.datastore(); + let opctx = db.opctx(); + + // Create test nexus IDs + let nexus1_id = OmicronZoneUuid::new_v4(); + let nexus2_id = OmicronZoneUuid::new_v4(); + + // Insert records directly using the test method + datastore + .database_nexus_access_insert( + nexus1_id, + DbMetadataNexusState::Active, + ) + .await + .expect("Failed to insert nexus1 access"); + datastore + .database_nexus_access_insert( + nexus2_id, + DbMetadataNexusState::NotYet, + ) + .await + .expect("Failed to insert nexus2 access"); + + // Verify records were created + let nexus1_before = datastore + .database_nexus_access(nexus1_id) + .await + .expect("Failed to get nexus1 access"); + let nexus2_before = datastore + .database_nexus_access(nexus2_id) + .await + .expect("Failed to get nexus2 access"); + assert!(nexus1_before.is_some(), "nexus1 should have access record"); + assert!(nexus2_before.is_some(), "nexus2 should have access record"); + + // Delete nexus1 record + datastore + .database_nexus_access_delete(&opctx, nexus1_id) + .await + .expect("Failed to delete nexus1 access"); + + // Verify nexus1 record was deleted, nexus2 record remains + let nexus1_after = datastore + .database_nexus_access(nexus1_id) + .await + .expect("Failed to get nexus1 access after delete"); + let nexus2_after = datastore + .database_nexus_access(nexus2_id) + .await + .expect("Failed to get nexus2 access after delete"); + assert!( + nexus1_after.is_none(), + "nexus1 should not have access record after delete" + ); + assert!( + nexus2_after.is_some(), + "nexus2 should still have access record" + ); + + // Delete nexus2 record + datastore + .database_nexus_access_delete(&opctx, nexus2_id) + .await + .expect("Failed to delete nexus2 access"); + + // Verify nexus2 record was also deleted + let nexus2_final = datastore + .database_nexus_access(nexus2_id) + .await + .expect("Failed to get nexus2 access after final delete"); + assert!( + nexus2_final.is_none(), + "nexus2 should not have access record after delete" + ); + + // Confirm deletion is idempotent + datastore + .database_nexus_access_delete(&opctx, nexus1_id) + .await + .expect("Failed to delete nexus1 access idempotently"); + + // This also means deleting non-existent records should be fine + datastore + .database_nexus_access_delete(&opctx, OmicronZoneUuid::new_v4()) + .await + .expect("Failed to delete non-existent record"); + + db.terminate().await; + logctx.cleanup_successful(); + } } diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 317680cbf7f..124962090b7 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -39,8 +39,9 @@ use omicron_common::api::external::ResourceType; use omicron_common::backoff::{ BackoffError, retry_notify, retry_policy_internal_service, }; -use omicron_uuid_kinds::{GenericUuid, SledUuid}; +use omicron_uuid_kinds::{GenericUuid, OmicronZoneUuid, SledUuid}; use slog::Logger; +use slog_error_chain::InlineErrorChain; use std::net::Ipv6Addr; use std::num::NonZeroU32; use std::sync::Arc; @@ -121,6 +122,8 @@ pub mod webhook_delivery; mod zpool; pub use address_lot::AddressLotCreateResult; +pub use db_metadata::DatastoreSetupAction; +pub use db_metadata::ValidatedDatastoreSetupAction; pub use dns::DataStoreDnsTest; pub use dns::DnsVersionUpdateBuilder; pub use ereport::EreportFilters; @@ -189,6 +192,21 @@ impl RunnableQuery for T where { } +/// Specifies whether the consumer wants to check whether they're allowed to +/// access the database based on the `db_metadata_nexus` table. +#[derive(Debug, Clone, Copy)] +pub enum IdentityCheckPolicy { + /// The consumer wants full access to the database regardless of the current + /// upgrade / handoff state. This would be used by almost all tools and + /// tests. + DontCare, + + /// The consumer only wants to access the database if it's in the current + /// set of Nexus instances that's supposed to be able to access it. If + /// possible and legal, take over access from the existing set. + CheckAndTakeover { nexus_id: OmicronZoneUuid }, +} + pub struct DataStore { log: Logger, pool: Arc, @@ -224,8 +242,9 @@ impl DataStore { log: &Logger, pool: Arc, config: Option<&AllSchemaVersions>, + identity_check: IdentityCheckPolicy, ) -> Result { - Self::new_with_timeout(log, pool, config, None).await + Self::new_with_timeout(log, pool, config, None, identity_check).await } pub async fn new_with_timeout( @@ -233,7 +252,9 @@ impl DataStore { pool: Arc, config: Option<&AllSchemaVersions>, try_for: Option, + identity_check: IdentityCheckPolicy, ) -> Result { + use db_metadata::DatastoreSetupAction; use nexus_db_model::SCHEMA_VERSION as EXPECTED_VERSION; let datastore = @@ -247,25 +268,89 @@ impl DataStore { || async { if let Some(try_for) = try_for { if std::time::Instant::now() > start + try_for { - return Err(BackoffError::permanent(())); + return Err(BackoffError::permanent( + "Timeout waiting for DataStore::new_with_timeout", + )); } } - match datastore - .ensure_schema(&log, EXPECTED_VERSION, config) - .await - { - Ok(()) => return Ok(()), - Err(e) => { - warn!(log, "Failed to ensure schema version"; "error" => #%e); + loop { + let checked_action = datastore + .check_schema_and_access( + identity_check, + EXPECTED_VERSION, + ) + .await + .map_err(|err| { + warn!( + log, + "Cannot check schema version / Nexus access"; + "error" => InlineErrorChain::new(err.as_ref()), + ); + BackoffError::transient( + "Cannot check schema version / Nexus access", + ) + })?; + + match checked_action.action() { + DatastoreSetupAction::Ready => { + info!(log, "Datastore is ready for usage"); + return Ok(()); + } + DatastoreSetupAction::NeedsHandoff { nexus_id } => { + info!(log, "Datastore is awaiting handoff"); + + datastore.attempt_handoff(*nexus_id).await.map_err( + |err| { + warn!( + log, + "Could not handoff to new nexus"; + err + ); + BackoffError::transient( + "Could not handoff to new nexus", + ) + }, + )?; + + // If the handoff was successful, immediately + // re-evaluate the schema and access policies to see + // if we should update or not. + continue; + } + DatastoreSetupAction::Update => { + info!( + log, + "Datastore should be updated before usage" + ); + datastore + .update_schema(checked_action, config) + .await + .map_err(|err| { + warn!( + log, + "Failed to update schema version"; + "error" => InlineErrorChain::new(err.as_ref()) + ); + BackoffError::transient( + "Failed to update schema version", + ) + })?; + return Ok(()); + } + DatastoreSetupAction::Refuse => { + error!(log, "Datastore should not be used"); + return Err(BackoffError::permanent( + "Datastore should not be used", + )); + } } - }; - return Err(BackoffError::transient(())); + } }, |_, _| {}, ) .await - .map_err(|_| "Failed to read valid DB schema".to_string())?; + .map_err(|err| err.to_string())?; Ok(datastore) } diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 70cf81c1bb6..68d8ca68359 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -728,6 +728,7 @@ impl DataStore { // - Zpools // - Datasets // - A blueprint + // - Nexus database access records // // Which RSS has already allocated during bootstrapping. @@ -794,6 +795,22 @@ impl DataStore { DieselError::RollbackTransaction })?; + // Insert Nexus database access records + self.initialize_nexus_access_from_blueprint_on_connection( + &conn, + blueprint.all_omicron_zones(BlueprintZoneDisposition::is_in_service) + .filter_map(|(_sled, zone_cfg)| { + if zone_cfg.zone_type.is_nexus() { + Some(zone_cfg.id) + } else { + None + } + }).collect(), + ).await.map_err(|e| { + err.set(RackInitError::BlueprintTargetSet(e)).unwrap(); + DieselError::RollbackTransaction + })?; + // Allocate networking records for all services. for (_, zone_config) in blueprint.all_omicron_zones(BlueprintZoneDisposition::is_in_service) { self.rack_populate_service_networking_records( diff --git a/nexus/db-queries/src/db/pub_test_utils/mod.rs b/nexus/db-queries/src/db/pub_test_utils/mod.rs index db2acabd06a..420f96ee6ab 100644 --- a/nexus/db-queries/src/db/pub_test_utils/mod.rs +++ b/nexus/db-queries/src/db/pub_test_utils/mod.rs @@ -12,6 +12,7 @@ use crate::authz; use crate::context::OpContext; use crate::db; use crate::db::DataStore; +use crate::db::datastore::IdentityCheckPolicy; use omicron_test_utils::dev::db::CockroachInstance; use slog::Logger; use std::sync::Arc; @@ -114,7 +115,14 @@ impl TestDatabaseBuilder { Interface::Datastore => { let pool = new_pool(log, &db); let datastore = Arc::new( - DataStore::new(&log, pool, None).await.unwrap(), + DataStore::new( + &log, + pool, + None, + IdentityCheckPolicy::DontCare, + ) + .await + .unwrap(), ); TestDatabase { db, @@ -300,7 +308,11 @@ async fn datastore_test( let cfg = db::Config { url: db.pg_config().clone() }; let pool = Arc::new(db::Pool::new_single_host(&log, &cfg)); - let datastore = Arc::new(DataStore::new(&log, pool, None).await.unwrap()); + let datastore = Arc::new( + DataStore::new(&log, pool, None, IdentityCheckPolicy::DontCare) + .await + .unwrap(), + ); // Create an OpContext with the credentials of "db-init" just for the // purpose of loading the built-in users, roles, and assignments. diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index 1766054c9ad..bac6d5ae3b4 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -37,6 +37,7 @@ define_enums! { CabooseWhichEnum => "caboose_which", ClickhouseModeEnum => "clickhouse_mode", DatasetKindEnum => "dataset_kind", + DbMetadataNexusStateEnum => "db_metadata_nexus_state", DnsGroupEnum => "dns_group", DownstairsClientStopRequestReasonEnum => "downstairs_client_stop_request_reason_type", DownstairsClientStoppedReasonEnum => "downstairs_client_stopped_reason_type", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 7d4daff9d2d..f38797743c7 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -2371,6 +2371,14 @@ table! { } } +table! { + db_metadata_nexus (nexus_id) { + nexus_id -> Uuid, + last_drained_blueprint_id -> Nullable, + state -> crate::enums::DbMetadataNexusStateEnum, + } +} + table! { migration (id) { id -> Uuid, diff --git a/nexus/reconfigurator/execution/src/database.rs b/nexus/reconfigurator/execution/src/database.rs new file mode 100644 index 00000000000..9652e53ec1a --- /dev/null +++ b/nexus/reconfigurator/execution/src/database.rs @@ -0,0 +1,24 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Manages deployment of records into the database. + +use anyhow::anyhow; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::Blueprint; + +/// Idempotently ensure that the Nexus records for the zones are populated +/// in the database. +pub(crate) async fn deploy_db_metadata_nexus_records( + opctx: &OpContext, + datastore: &DataStore, + blueprint: &Blueprint, +) -> Result<(), anyhow::Error> { + datastore + .database_nexus_access_create(opctx, blueprint) + .await + .map_err(|err| anyhow!(err))?; + Ok(()) +} diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 43c09485557..5660fcfc06f 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -32,8 +32,10 @@ use tokio::sync::watch; use update_engine::StepSuccess; use update_engine::StepWarning; use update_engine::merge_anyhow_list; + mod clickhouse; mod cockroachdb; +mod database; mod dns; mod omicron_physical_disks; mod omicron_sled_config; @@ -196,6 +198,13 @@ pub async fn realize_blueprint( ) .into_shared(); + register_deploy_db_metadata_nexus_records_step( + &engine.for_component(ExecutionComponent::SledList), + &opctx, + datastore, + blueprint, + ); + register_deploy_sled_configs_step( &engine.for_component(ExecutionComponent::SledAgent), &opctx, @@ -390,6 +399,28 @@ fn register_sled_list_step<'a>( .register() } +fn register_deploy_db_metadata_nexus_records_step<'a>( + registrar: &ComponentRegistrar<'_, 'a>, + opctx: &'a OpContext, + datastore: &'a DataStore, + blueprint: &'a Blueprint, +) { + registrar + .new_step( + ExecutionStepId::Ensure, + "Ensure db_metadata_nexus_state records exist", + async move |_cx| { + database::deploy_db_metadata_nexus_records( + opctx, &datastore, &blueprint, + ) + .await + .context("ensuring db_metadata_nexus_state")?; + StepSuccess::new(()).into() + }, + ) + .register(); +} + fn register_deploy_sled_configs_step<'a>( registrar: &ComponentRegistrar<'_, 'a>, opctx: &'a OpContext, diff --git a/nexus/reconfigurator/execution/src/omicron_zones.rs b/nexus/reconfigurator/execution/src/omicron_zones.rs index 28c981fd90e..74e4625358d 100644 --- a/nexus/reconfigurator/execution/src/omicron_zones.rs +++ b/nexus/reconfigurator/execution/src/omicron_zones.rs @@ -72,10 +72,13 @@ async fn clean_up_expunged_zones_impl( )); let result = match &config.zone_type { - // Zones which need no cleanup work after expungement. - BlueprintZoneType::Nexus(_) => None, - // Zones which need cleanup after expungement. + BlueprintZoneType::Nexus(_) => Some( + datastore + .database_nexus_access_delete(&opctx, config.id) + .await + .map_err(|err| anyhow::anyhow!(err)), + ), BlueprintZoneType::CockroachDb(_) => { if decommission_cockroach { Some( diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index cb3ca045cf9..4a037cb0968 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -24,6 +24,7 @@ use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; +use nexus_db_queries::db::datastore::IdentityCheckPolicy; use nexus_mgs_updates::ArtifactCache; use nexus_mgs_updates::MgsUpdateDriver; use nexus_types::deployment::PendingMgsUpdates; @@ -310,12 +311,14 @@ impl Nexus { .map(|s| AllSchemaVersions::load(&s.schema_dir)) .transpose() .map_err(|error| format!("{error:#}"))?; + let nexus_id = config.deployment.id; let db_datastore = Arc::new( db::DataStore::new_with_timeout( &log, Arc::clone(&pool), all_versions.as_ref(), config.pkg.tunables.load_timeout, + IdentityCheckPolicy::CheckAndTakeover { nexus_id }, ) .await?, ); diff --git a/nexus/src/bin/schema-updater.rs b/nexus/src/bin/schema-updater.rs index ead8554f734..9d5869180d6 100644 --- a/nexus/src/bin/schema-updater.rs +++ b/nexus/src/bin/schema-updater.rs @@ -14,6 +14,8 @@ use nexus_db_model::AllSchemaVersions; use nexus_db_model::SCHEMA_VERSION; use nexus_db_queries::db; use nexus_db_queries::db::DataStore; +use nexus_db_queries::db::datastore::DatastoreSetupAction; +use nexus_db_queries::db::datastore::IdentityCheckPolicy; use semver::Version; use slog::Drain; use slog::Level; @@ -108,11 +110,39 @@ async fn main_impl() -> anyhow::Result<()> { } Cmd::Upgrade { version } => { println!("Upgrading to {version}"); - datastore - .ensure_schema(&log, version.clone(), Some(&all_versions)) - .await - .map_err(|e| anyhow!(e))?; - println!("Upgrade to {version} complete"); + let checked_action = datastore + .check_schema_and_access( + IdentityCheckPolicy::DontCare, + version.clone(), + ) + .await?; + + match checked_action.action() { + DatastoreSetupAction::Ready => { + println!("Already at version {version}") + } + DatastoreSetupAction::Update => { + datastore + .update_schema(checked_action, Some(&all_versions)) + .await + .map_err(|e| anyhow!(e))?; + println!("Update to {version} complete"); + } + DatastoreSetupAction::Refuse => { + println!("Refusing to update to version {version}") + } + DatastoreSetupAction::NeedsHandoff { .. } => { + // This case should not happen - we supplied + // IdentityCheckPolicy::DontCare, so we should not be told + // to attempt a takeover by a specific Nexus. + println!( + "Refusing to update to version {version}. \ + The schema updater tried to ignore the identity check, \ + but got a response indicating handoff is needed. \ + This is unexpected, and probably a bug" + ) + } + } } } datastore.terminate().await; diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs index b77245d5df4..93632d77c10 100644 --- a/nexus/src/populate.rs +++ b/nexus/src/populate.rs @@ -339,6 +339,7 @@ mod test { use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; + use nexus_db_queries::db::datastore::IdentityCheckPolicy; use nexus_db_queries::db::pub_test_utils::TestDatabase; use omicron_common::api::external::Error; use omicron_test_utils::dev; @@ -364,7 +365,14 @@ mod test { let cfg = db::Config { url: db.crdb().pg_config().clone() }; let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); let datastore = Arc::new( - db::DataStore::new(&logctx.log, pool, None).await.unwrap(), + db::DataStore::new( + &logctx.log, + pool, + None, + IdentityCheckPolicy::DontCare, + ) + .await + .unwrap(), ); let opctx = OpContext::for_background( logctx.log.clone(), @@ -415,7 +423,14 @@ mod test { // We need to create the datastore before tearing down the database, as // it verifies the schema version of the DB while booting. let datastore = Arc::new( - db::DataStore::new(&logctx.log, pool, None).await.unwrap(), + db::DataStore::new( + &logctx.log, + pool, + None, + IdentityCheckPolicy::DontCare, + ) + .await + .unwrap(), ); let opctx = OpContext::for_background( logctx.log.clone(), diff --git a/nexus/tests/integration_tests/initialization.rs b/nexus/tests/integration_tests/initialization.rs index fa4511d6de1..32c030534f2 100644 --- a/nexus/tests/integration_tests/initialization.rs +++ b/nexus/tests/integration_tests/initialization.rs @@ -261,7 +261,7 @@ async fn test_nexus_does_not_boot_without_valid_schema() { .expect_err("Nexus should have failed to start"); assert!( - err.contains("Failed to read valid DB schema"), + err.contains("Datastore should not be used"), "Saw error: {err}" ); diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 9111e6a0949..0ba98bdf0b6 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -2889,6 +2889,157 @@ fn after_171_0_0<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { }) } +const NEXUS_ID_184_0: &str = "387433f9-1473-4ca2-b156-9670452985e0"; +const EXPUNGED_NEXUS_ID_184_0: &str = "287433f9-1473-4ca2-b156-9670452985e0"; +const OLD_NEXUS_ID_184_0: &str = "187433f9-1473-4ca2-b156-9670452985e0"; + +const BP_ID_184_0: &str = "5a5ff941-3b5a-403b-9fda-db2049f4c736"; +const OLD_BP_ID_184_0: &str = "4a5ff941-3b5a-403b-9fda-db2049f4c736"; + +fn before_184_0_0<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { + Box::pin(async move { + // Create a blueprint which contains a Nexus - we'll use this for the migration. + // + // It also contains an exupnged Nexus, which should be ignored. + ctx.client + .execute( + &format!( + "INSERT INTO omicron.public.bp_target + (version, blueprint_id, enabled, time_made_target) + VALUES + (1, '{BP_ID_184_0}', true, now());", + ), + &[], + ) + .await + .expect("inserted bp_target rows for 182"); + ctx.client + .execute( + &format!( + "INSERT INTO omicron.public.bp_omicron_zone ( + blueprint_id, sled_id, id, zone_type, + primary_service_ip, primary_service_port, + second_service_ip, second_service_port, + dataset_zpool_name, bp_nic_id, + dns_gz_address, dns_gz_address_index, + ntp_ntp_servers, ntp_dns_servers, ntp_domain, + nexus_external_tls, nexus_external_dns_servers, + snat_ip, snat_first_port, snat_last_port, + external_ip_id, filesystem_pool, disposition, + disposition_expunged_as_of_generation, + disposition_expunged_ready_for_cleanup, + image_source, image_artifact_sha256 + ) + VALUES ( + '{BP_ID_184_0}', gen_random_uuid(), '{NEXUS_ID_184_0}', + 'nexus', '192.168.1.10', 8080, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, false, ARRAY[]::INET[], + NULL, NULL, NULL, NULL, gen_random_uuid(), + 'in_service', NULL, false, 'install_dataset', NULL + ), + ( + '{BP_ID_184_0}', gen_random_uuid(), + '{EXPUNGED_NEXUS_ID_184_0}', 'nexus', '192.168.1.11', + 8080, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, false, ARRAY[]::INET[], NULL, NULL, NULL, NULL, + gen_random_uuid(), 'expunged', 1, false, + 'install_dataset', NULL + );" + ), + &[], + ) + .await + .expect("inserted bp_omicron_zone rows for 182"); + + // ALSO create an old blueprint, which isn't the latest target. + // + // We should ignore this one! No rows should be inserted for old data. + ctx.client + .execute( + &format!( + "INSERT INTO omicron.public.bp_target + (version, blueprint_id, enabled, time_made_target) + VALUES + (0, '{OLD_BP_ID_184_0}', true, now());", + ), + &[], + ) + .await + .expect("inserted bp_target rows for 182"); + ctx.client + .execute( + &format!( + "INSERT INTO omicron.public.bp_omicron_zone ( + blueprint_id, sled_id, id, zone_type, + primary_service_ip, primary_service_port, + second_service_ip, second_service_port, + dataset_zpool_name, bp_nic_id, + dns_gz_address, dns_gz_address_index, + ntp_ntp_servers, ntp_dns_servers, ntp_domain, + nexus_external_tls, nexus_external_dns_servers, + snat_ip, snat_first_port, snat_last_port, + external_ip_id, filesystem_pool, disposition, + disposition_expunged_as_of_generation, + disposition_expunged_ready_for_cleanup, + image_source, image_artifact_sha256 + ) + VALUES ( + '{OLD_BP_ID_184_0}', gen_random_uuid(), + '{OLD_NEXUS_ID_184_0}', 'nexus', '192.168.1.10', 8080, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, + false, ARRAY[]::INET[], NULL, NULL, NULL, + NULL, gen_random_uuid(), 'in_service', + NULL, false, 'install_dataset', NULL + );" + ), + &[], + ) + .await + .expect("inserted bp_omicron_zone rows for 182"); + }) +} + +fn after_184_0_0<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { + Box::pin(async move { + // After the migration, the new row should be created - only for Nexuses + // in the latest blueprint. + // + // Note that "OLD_NEXUS_ID_184_0" doesn't get a row - it's in an old + // blueprint. + let rows = ctx + .client + .query( + "SELECT + nexus_id, + last_drained_blueprint_id, + state + FROM omicron.public.db_metadata_nexus;", + &[], + ) + .await + .expect("queried post-migration inv_sled_config_reconciler"); + + let rows = process_rows(&rows); + assert_eq!(rows.len(), 1); + let row = &rows[0]; + + // Create a new row for the Nexuses in the target blueprint + assert_eq!( + row.values[0].expect("nexus_id").unwrap(), + &AnySqlType::Uuid(NEXUS_ID_184_0.parse().unwrap()) + ); + assert_eq!(row.values[1].expect("last_drained_blueprint_id"), None); + assert_eq!( + row.values[2].expect("state").unwrap(), + &AnySqlType::Enum(SqlEnum::from(( + "db_metadata_nexus_state", + "active" + ))) + ); + }) +} + // Lazily initializes all migration checks. The combination of Rust function // pointers and async makes defining a static table fairly painful, so we're // using lazy initialization instead. @@ -2987,7 +3138,10 @@ fn get_migration_checks() -> BTreeMap { Version::new(171, 0, 0), DataMigrationFns::new().before(before_171_0_0).after(after_171_0_0), ); - + map.insert( + Version::new(183, 0, 0), + DataMigrationFns::new().before(before_184_0_0).after(after_184_0_0), + ); map } diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 4ef07d9dbdf..a061adf5d18 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -5628,29 +5628,6 @@ CREATE INDEX IF NOT EXISTS lookup_region_snapshot_replacement_step_by_state CREATE INDEX IF NOT EXISTS lookup_region_snapshot_replacement_step_by_old_volume_id on omicron.public.region_snapshot_replacement_step (old_snapshot_volume_id); -/* - * Metadata for the schema itself. This version number isn't great, as there's - * nothing to ensure it gets bumped when it should be, but it's a start. - */ -CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( - -- There should only be one row of this table for the whole DB. - -- It's a little goofy, but filter on "singleton = true" before querying - -- or applying updates, and you'll access the singleton row. - -- - -- We also add a constraint on this table to ensure it's not possible to - -- access the version of this table with "singleton = false". - singleton BOOL NOT NULL PRIMARY KEY, - time_created TIMESTAMPTZ NOT NULL, - time_modified TIMESTAMPTZ NOT NULL, - -- Semver representation of the DB version - version STRING(64) NOT NULL, - - -- (Optional) Semver representation of the DB version to which we're upgrading - target_version STRING(64), - - CHECK (singleton = true) -); - -- An allowlist of IP addresses that can make requests to user-facing services. CREATE TABLE IF NOT EXISTS omicron.public.allow_list ( id UUID PRIMARY KEY, @@ -6551,10 +6528,59 @@ ON omicron.public.host_ereport ( ) WHERE time_deleted IS NULL; -/* - * Keep this at the end of file so that the database does not contain a version - * until it is fully populated. - */ +-- Metadata for the schema itself. +-- +-- This table may be read by Nexuses with different notions of "what the schema should be". +-- Unlike other tables in the database, caution should be taken when upgrading this schema. +CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( + -- There should only be one row of this table for the whole DB. + -- It's a little goofy, but filter on "singleton = true" before querying + -- or applying updates, and you'll access the singleton row. + -- + -- We also add a constraint on this table to ensure it's not possible to + -- access the version of this table with "singleton = false". + singleton BOOL NOT NULL PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + -- Semver representation of the DB version + version STRING(64) NOT NULL, + + -- (Optional) Semver representation of the DB version to which we're upgrading + target_version STRING(64), + + CHECK (singleton = true) +); + +CREATE TYPE IF NOT EXISTS omicron.public.db_metadata_nexus_state AS ENUM ( + -- This Nexus is allowed to access this database + 'active', + + -- This Nexus is not yet allowed to access the database + 'not_yet', + + -- This Nexus has committed to no longer accessing this database + 'quiesced' +); + +-- Nexuses which may be attempting to access the database, and a state +-- which identifies if they should be allowed to do so. +-- +-- This table is used during upgrade implement handoff between old and new +-- Nexus zones. It is read by all Nexuses during initialization to identify +-- if they should have access to the database. +CREATE TABLE IF NOT EXISTS omicron.public.db_metadata_nexus ( + nexus_id UUID NOT NULL PRIMARY KEY, + last_drained_blueprint_id UUID, + state omicron.public.db_metadata_nexus_state NOT NULL +); + +CREATE UNIQUE INDEX IF NOT EXISTS lookup_db_metadata_nexus_by_state on omicron.public.db_metadata_nexus ( + state, + nexus_id +); + +-- Keep this at the end of file so that the database does not contain a version +-- until it is fully populated. INSERT INTO omicron.public.db_metadata ( singleton, time_created, @@ -6562,7 +6588,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '183.0.0', NULL) + (TRUE, NOW(), NOW(), '184.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/populate-db-metadata-nexus/up01.sql b/schema/crdb/populate-db-metadata-nexus/up01.sql new file mode 100644 index 00000000000..25c42761e04 --- /dev/null +++ b/schema/crdb/populate-db-metadata-nexus/up01.sql @@ -0,0 +1,6 @@ +CREATE TYPE IF NOT EXISTS omicron.public.db_metadata_nexus_state AS ENUM ( + 'active', + 'not_yet', + 'quiesced' +); + diff --git a/schema/crdb/populate-db-metadata-nexus/up02.sql b/schema/crdb/populate-db-metadata-nexus/up02.sql new file mode 100644 index 00000000000..9fac217eec4 --- /dev/null +++ b/schema/crdb/populate-db-metadata-nexus/up02.sql @@ -0,0 +1,11 @@ +-- Nexuses which may be attempting to access the database, and a state +-- which identifies if they should be allowed to do so. +-- +-- This table is used during upgrade implement handoff between old and new +-- Nexus zones. +CREATE TABLE IF NOT EXISTS omicron.public.db_metadata_nexus ( + nexus_id UUID NOT NULL PRIMARY KEY, + last_drained_blueprint_id UUID, + state omicron.public.db_metadata_nexus_state NOT NULL +); + diff --git a/schema/crdb/populate-db-metadata-nexus/up03.sql b/schema/crdb/populate-db-metadata-nexus/up03.sql new file mode 100644 index 00000000000..42fbf004137 --- /dev/null +++ b/schema/crdb/populate-db-metadata-nexus/up03.sql @@ -0,0 +1,4 @@ +CREATE UNIQUE INDEX IF NOT EXISTS lookup_db_metadata_nexus_by_state on omicron.public.db_metadata_nexus ( + state, + nexus_id +); diff --git a/schema/crdb/populate-db-metadata-nexus/up04.sql b/schema/crdb/populate-db-metadata-nexus/up04.sql new file mode 100644 index 00000000000..36b876b9cdd --- /dev/null +++ b/schema/crdb/populate-db-metadata-nexus/up04.sql @@ -0,0 +1,16 @@ +-- Populate db_metadata_nexus records for all Nexus zones in the current target blueprint +-- +-- This migration handles backfill for existing deployments that are upgrading +-- to include db_metadata_nexus. It finds all Nexus zones in the current +-- target blueprint and marks them as 'active' in the db_metadata_nexus table. + +SET LOCAL disallow_full_table_scans = off; + +INSERT INTO omicron.public.db_metadata_nexus (nexus_id, last_drained_blueprint_id, state) +SELECT bz.id, NULL, 'active' +FROM omicron.public.bp_target bt +JOIN omicron.public.bp_omicron_zone bz ON bt.blueprint_id = bz.blueprint_id +WHERE bz.zone_type = 'nexus' + AND bz.disposition != 'expunged' + AND bt.version = (SELECT MAX(version) FROM omicron.public.bp_target) +ON CONFLICT (nexus_id) DO NOTHING;