Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 61 additions & 10 deletions dev-tools/omdb/src/bin/omdb/nexus/quiesce.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ use chrono::Utc;
use clap::Args;
use clap::Subcommand;
use nexus_client::types::QuiesceState;
use nexus_client::types::QuiesceStatus;
use nexus_client::types::SagaQuiesceStatus;
use std::time::Duration;

#[derive(Debug, Args)]
Expand All @@ -31,9 +33,9 @@ pub enum QuiesceCommands {

#[derive(Debug, Args)]
pub struct QuiesceShowArgs {
/// Show details about held database connections
/// Show stack traces for held database connections
#[clap(short, long, default_value_t = false)]
verbose: bool,
stacks: bool,
}

pub async fn cmd_nexus_quiesce(
Expand All @@ -60,7 +62,10 @@ async fn quiesce_show(
.await
.context("fetching quiesce state")?
.into_inner();
match quiesce.state {

let QuiesceStatus { db_claims, sagas, state } = quiesce;

match state {
QuiesceState::Undetermined => {
println!("has not yet determined if it is quiescing");
}
Expand Down Expand Up @@ -145,25 +150,71 @@ async fn quiesce_show(
}
}

println!("sagas running: {}", quiesce.sagas_pending.len());
for saga in &quiesce.sagas_pending {
let SagaQuiesceStatus {
sagas_pending,
drained_blueprint_id,
first_recovery_complete,
new_sagas_allowed,
reassignment_blueprint_id,
reassignment_generation,
reassignment_pending,
recovered_blueprint_id,
recovered_reassignment_generation,
} = sagas;

println!("saga quiesce:");
println!(" new sagas: {:?}", new_sagas_allowed);
println!(
" drained as of blueprint: {}",
drained_blueprint_id
.map(|s| s.to_string())
.as_deref()
.unwrap_or("none")
);
println!(
" blueprint for last recovery pass: {}",
recovered_blueprint_id
.map(|s| s.to_string())
.as_deref()
.unwrap_or("none")
);
println!(
" blueprint for last reassignment pass: {}",
reassignment_blueprint_id
.map(|s| s.to_string())
.as_deref()
.unwrap_or("none")
);
println!(
" reassignment generation: {} (pass running: {})",
reassignment_generation,
if reassignment_pending { "yes" } else { "no" }
);
println!(" recovered generation: {}", recovered_reassignment_generation);
println!(
" recovered at least once successfully: {}",
if first_recovery_complete { "yes" } else { "no" },
);

println!(" sagas running: {}", sagas_pending.len());
for saga in &sagas_pending {
println!(
" saga {} pending since {} ({})",
" saga {} pending since {} ({})",
saga.saga_id,
humantime::format_rfc3339_millis(saga.time_pending.into()),
saga.saga_name
);
}

println!("database connections held: {}", quiesce.db_claims.len());
for claim in &quiesce.db_claims {
println!("database connections held: {}", db_claims.len());
for claim in &db_claims {
println!(
" claim {} held since {} ({} ago)",
claim.id,
claim.held_since,
format_time_delta(Utc::now() - claim.held_since),
);
if args.verbose {
if args.stacks {
println!(" acquired by:");
println!("{}", textwrap::indent(&claim.debug, " "));
}
Expand All @@ -177,7 +228,7 @@ async fn quiesce_start(
_token: DestructiveOperationToken,
) -> Result<(), anyhow::Error> {
client.quiesce_start().await.context("quiescing Nexus")?;
quiesce_show(client, &QuiesceShowArgs { verbose: false }).await
quiesce_show(client, &QuiesceShowArgs { stacks: false }).await
}

fn format_duration_ms(duration: Duration) -> String {
Expand Down
21 changes: 10 additions & 11 deletions nexus/reconfigurator/execution/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use nexus_types::deployment::execution::{
StepHandle, StepResult, UpdateEngine,
};
use nexus_types::quiesce::SagaQuiesceHandle;
use nexus_types::quiesce::SagaReassignmentDone;
use omicron_uuid_kinds::OmicronZoneUuid;
use slog::info;
use slog_error_chain::InlineErrorChain;
Expand Down Expand Up @@ -627,18 +628,16 @@ fn register_reassign_sagas_step<'a>(
match reassigned {
Ok(needs_saga_recovery) => (
StepSuccess::new(needs_saga_recovery).build(),
needs_saga_recovery,
SagaReassignmentDone::ReassignedAllAsOf(
blueprint.id,
needs_saga_recovery,
),
),
Err(error) => (
StepWarning::new(false, error.to_string())
.build(),
SagaReassignmentDone::Indeterminate,
),
Err(error) => {
// It's possible that we failed after having
// re-assigned sagas in the database.
let maybe_reassigned = true;
(
StepWarning::new(false, error.to_string())
.build(),
maybe_reassigned,
)
}
}
})
.await)
Expand Down
12 changes: 7 additions & 5 deletions nexus/src/app/quiesce.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ impl super::Nexus {
) -> LookupResult<QuiesceStatus> {
opctx.authorize(authz::Action::Read, &authz::QUIESCE_STATE).await?;
let state = self.quiesce.state();
let sagas_pending = self.quiesce.sagas().sagas_pending();
let sagas = self.quiesce.sagas().status();
let db_claims = self.datastore().claims_held();
Ok(QuiesceStatus { state, sagas_pending, db_claims })
Ok(QuiesceStatus { state, sagas, db_claims })
}
}

Expand Down Expand Up @@ -281,7 +281,7 @@ mod test {
assert!(duration_total >= duration_draining_db);
assert!(duration_total >= duration_recording_quiesce);
assert!(duration_total <= (after - before).to_std().unwrap());
assert!(status.sagas_pending.is_empty());
assert!(status.sagas.sagas_pending.is_empty());
assert!(status.db_claims.is_empty());
}

Expand Down Expand Up @@ -355,7 +355,9 @@ mod test {
quiesce_status.state,
QuiesceState::DrainingSagas { .. }
);
assert!(quiesce_status.sagas_pending.contains_key(&demo_saga.saga_id));
assert!(
quiesce_status.sagas.sagas_pending.contains_key(&demo_saga.saga_id)
);
// We should see at least one held database claim from the one we took
// above.
assert!(!quiesce_status.db_claims.is_empty());
Expand Down Expand Up @@ -419,7 +421,7 @@ mod test {
if !matches!(rv.state, QuiesceState::DrainingDb { .. }) {
return Err(CondCheckError::<NexusClientError>::NotYet);
}
assert!(rv.sagas_pending.is_empty());
assert!(rv.sagas.sagas_pending.is_empty());
// The database claim we took is still held.
assert!(!rv.db_claims.is_empty());
Ok(())
Expand Down
9 changes: 3 additions & 6 deletions nexus/types/src/internal_api/views.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::inventory::BaseboardId;
use crate::inventory::Caboose;
use crate::inventory::CabooseWhich;
use crate::inventory::Collection;
use crate::quiesce::SagaQuiesceStatus;
use chrono::DateTime;
use chrono::SecondsFormat;
use chrono::Utc;
Expand Down Expand Up @@ -721,12 +722,8 @@ pub struct QuiesceStatus {
/// what stage of quiescing is Nexus at
pub state: QuiesceState,

/// what sagas are currently running or known needing to be recovered
///
/// This should only be non-empty when state is `Running` or
/// `WaitingForSagas`. Entries here prevent transitioning from
/// `WaitingForSagas` to `WaitingForDb`.
pub sagas_pending: IdOrdMap<PendingSagaInfo>,
/// information about saga quiescing
pub sagas: SagaQuiesceStatus,

/// what database claims are currently held (by any part of Nexus)
///
Expand Down
Loading
Loading