Skip to content

Commit a1c52ba

Browse files
committed
update omdb
1 parent 1ecea95 commit a1c52ba

File tree

5 files changed

+217
-48
lines changed

5 files changed

+217
-48
lines changed

dev-tools/omdb/src/bin/omdb/nexus/quiesce.rs

Lines changed: 60 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ use chrono::Utc;
1212
use clap::Args;
1313
use clap::Subcommand;
1414
use nexus_client::types::QuiesceState;
15+
use nexus_client::types::QuiesceStatus;
16+
use nexus_client::types::SagaQuiesceStatus;
1517
use std::time::Duration;
1618

1719
#[derive(Debug, Args)]
@@ -31,9 +33,9 @@ pub enum QuiesceCommands {
3133

3234
#[derive(Debug, Args)]
3335
pub struct QuiesceShowArgs {
34-
/// Show details about held database connections
36+
/// Show stack traces for held database connections
3537
#[clap(short, long, default_value_t = false)]
36-
verbose: bool,
38+
stacks: bool,
3739
}
3840

3941
pub async fn cmd_nexus_quiesce(
@@ -60,7 +62,10 @@ async fn quiesce_show(
6062
.await
6163
.context("fetching quiesce state")?
6264
.into_inner();
63-
match quiesce.state {
65+
66+
let QuiesceStatus { db_claims, sagas, state } = quiesce;
67+
68+
match state {
6469
QuiesceState::Undetermined => {
6570
println!("has not yet determined if it is quiescing");
6671
}
@@ -145,25 +150,70 @@ async fn quiesce_show(
145150
}
146151
}
147152

148-
println!("sagas running: {}", quiesce.sagas_pending.len());
149-
for saga in &quiesce.sagas_pending {
153+
let SagaQuiesceStatus {
154+
sagas_pending,
155+
drained_blueprint_id,
156+
first_recovery_complete,
157+
new_sagas_allowed,
158+
reassignment_blueprint_id,
159+
reassignment_generation,
160+
reassignment_pending,
161+
recovered_blueprint_id,
162+
recovered_reassignment_generation,
163+
} = sagas;
164+
165+
println!("saga quiesce: {:?}", new_sagas_allowed);
166+
println!(
167+
"drained as of blueprint: {}",
168+
drained_blueprint_id
169+
.map(|s| s.to_string())
170+
.as_deref()
171+
.unwrap_or("none")
172+
);
173+
println!(
174+
" blueprint for last recovery pass: {}",
175+
recovered_blueprint_id
176+
.map(|s| s.to_string())
177+
.as_deref()
178+
.unwrap_or("none")
179+
);
180+
println!(
181+
" blueprint for last reassignment pass: {}",
182+
reassignment_blueprint_id
183+
.map(|s| s.to_string())
184+
.as_deref()
185+
.unwrap_or("none")
186+
);
187+
println!(
188+
" reassignment generation: {} (pass running: {})",
189+
reassignment_generation,
190+
if reassignment_pending { "yes" } else { "no " }
191+
);
192+
println!(" recovered generation: {}", recovered_reassignment_generation);
193+
println!(
194+
" recovered at least once successfully: {}",
195+
if first_recovery_complete { "yes" } else { "no" },
196+
);
197+
198+
println!(" sagas running: {}", sagas_pending.len());
199+
for saga in &sagas_pending {
150200
println!(
151-
" saga {} pending since {} ({})",
201+
" saga {} pending since {} ({})",
152202
saga.saga_id,
153203
humantime::format_rfc3339_millis(saga.time_pending.into()),
154204
saga.saga_name
155205
);
156206
}
157207

158-
println!("database connections held: {}", quiesce.db_claims.len());
159-
for claim in &quiesce.db_claims {
208+
println!("database connections held: {}", db_claims.len());
209+
for claim in &db_claims {
160210
println!(
161211
" claim {} held since {} ({} ago)",
162212
claim.id,
163213
claim.held_since,
164214
format_time_delta(Utc::now() - claim.held_since),
165215
);
166-
if args.verbose {
216+
if args.stacks {
167217
println!(" acquired by:");
168218
println!("{}", textwrap::indent(&claim.debug, " "));
169219
}
@@ -177,7 +227,7 @@ async fn quiesce_start(
177227
_token: DestructiveOperationToken,
178228
) -> Result<(), anyhow::Error> {
179229
client.quiesce_start().await.context("quiescing Nexus")?;
180-
quiesce_show(client, &QuiesceShowArgs { verbose: false }).await
230+
quiesce_show(client, &QuiesceShowArgs { stacks: false }).await
181231
}
182232

183233
fn format_duration_ms(duration: Duration) -> String {

nexus/src/app/quiesce.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ impl super::Nexus {
3232
) -> LookupResult<QuiesceStatus> {
3333
opctx.authorize(authz::Action::Read, &authz::QUIESCE_STATE).await?;
3434
let state = self.quiesce.state();
35-
let sagas_pending = self.quiesce.sagas().sagas_pending();
35+
let sagas = self.quiesce.sagas().status();
3636
let db_claims = self.datastore().claims_held();
37-
Ok(QuiesceStatus { state, sagas_pending, db_claims })
37+
Ok(QuiesceStatus { state, sagas, db_claims })
3838
}
3939
}
4040

@@ -281,7 +281,7 @@ mod test {
281281
assert!(duration_total >= duration_draining_db);
282282
assert!(duration_total >= duration_recording_quiesce);
283283
assert!(duration_total <= (after - before).to_std().unwrap());
284-
assert!(status.sagas_pending.is_empty());
284+
assert!(status.sagas.sagas_pending.is_empty());
285285
assert!(status.db_claims.is_empty());
286286
}
287287

@@ -355,7 +355,9 @@ mod test {
355355
quiesce_status.state,
356356
QuiesceState::DrainingSagas { .. }
357357
);
358-
assert!(quiesce_status.sagas_pending.contains_key(&demo_saga.saga_id));
358+
assert!(
359+
quiesce_status.sagas.sagas_pending.contains_key(&demo_saga.saga_id)
360+
);
359361
// We should see at least one held database claim from the one we took
360362
// above.
361363
assert!(!quiesce_status.db_claims.is_empty());
@@ -419,7 +421,7 @@ mod test {
419421
if !matches!(rv.state, QuiesceState::DrainingDb { .. }) {
420422
return Err(CondCheckError::<NexusClientError>::NotYet);
421423
}
422-
assert!(rv.sagas_pending.is_empty());
424+
assert!(rv.sagas.sagas_pending.is_empty());
423425
// The database claim we took is still held.
424426
assert!(!rv.db_claims.is_empty());
425427
Ok(())

nexus/types/src/internal_api/views.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use crate::deployment::TargetReleaseDescription;
77
use crate::inventory::BaseboardId;
88
use crate::inventory::CabooseWhich;
99
use crate::inventory::Collection;
10+
use crate::quiesce::SagaQuiesceStatus;
1011
use chrono::DateTime;
1112
use chrono::SecondsFormat;
1213
use chrono::Utc;
@@ -978,12 +979,8 @@ pub struct QuiesceStatus {
978979
/// what stage of quiescing is Nexus at
979980
pub state: QuiesceState,
980981

981-
/// what sagas are currently running or known needing to be recovered
982-
///
983-
/// This should only be non-empty when state is `Running` or
984-
/// `WaitingForSagas`. Entries here prevent transitioning from
985-
/// `WaitingForSagas` to `WaitingForDb`.
986-
pub sagas_pending: IdOrdMap<PendingSagaInfo>,
982+
/// information about saga quiescing
983+
pub sagas: SagaQuiesceStatus,
987984

988985
/// what database claims are currently held (by any part of Nexus)
989986
///

nexus/types/src/quiesce.rs

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ use iddqd::IdOrdMap;
1212
use omicron_common::api::external::Error;
1313
use omicron_common::api::external::Generation;
1414
use omicron_uuid_kinds::BlueprintUuid;
15+
use schemars::JsonSchema;
16+
use serde::Serialize;
1517
use slog::Logger;
1618
use slog::error;
1719
use slog::info;
@@ -25,7 +27,8 @@ use tokio::sync::watch;
2527
///
2628
/// This is used by Nexus quiesce to disallow creation of new sagas when we're
2729
/// trying to quiesce Nexus.
28-
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
30+
#[derive(Debug, Clone, Copy, Eq, PartialEq, JsonSchema, Serialize)]
31+
#[serde(rename_all = "snake_case")]
2932
enum SagasAllowed {
3033
/// New sagas may be started (normal condition)
3134
Allowed,
@@ -101,11 +104,11 @@ pub struct SagaQuiesceHandle {
101104
// cancellation behavior is abysmal), but we don't want to block on a
102105
// std `Condvar` in an async thread. There are options here (e.g.,
103106
// `block_on`), but they're not pleasant.
104-
inner: watch::Sender<SagaQuiesceInner>,
107+
inner: watch::Sender<SagaQuiesceStatus>,
105108
}
106109

107-
#[derive(Debug, Clone)]
108-
struct SagaQuiesceInner {
110+
#[derive(Debug, Clone, Serialize, JsonSchema)]
111+
pub struct SagaQuiesceStatus {
109112
/// current policy: are we allowed to *create* new sagas?
110113
///
111114
/// This also affects re-assigning sagas from expunged Nexus instances to
@@ -183,12 +186,13 @@ struct SagaQuiesceInner {
183186
/// whether a saga recovery operation is ongoing, and if one is:
184187
/// - what `reassignment_generation` was when it started
185188
/// - which blueprint id we'll be fully caught up to upon completion
189+
#[serde(skip)] // XXX-dap
186190
recovery_pending: Option<(Generation, Option<BlueprintUuid>)>,
187191
}
188192

189193
impl SagaQuiesceHandle {
190194
pub fn new(log: Logger) -> SagaQuiesceHandle {
191-
let (inner, _) = watch::channel(SagaQuiesceInner {
195+
let (inner, _) = watch::channel(SagaQuiesceStatus {
192196
new_sagas_allowed: SagasAllowed::DisallowedUnknown,
193197
sagas_pending: IdOrdMap::new(),
194198
first_recovery_complete: false,
@@ -253,6 +257,10 @@ impl SagaQuiesceHandle {
253257
}
254258

255259
/// Returns the blueprint id as of which sagas are fully drained
260+
///
261+
/// We may become un-drained if another re-assignment pass starts for a
262+
/// subsequent blueprint, but this fact will still be true that we *were*
263+
/// fully drained as of expungements included up through this blueprint.
256264
pub fn fully_drained_blueprint(&self) -> Option<BlueprintUuid> {
257265
self.inner.borrow().drained_blueprint_id
258266
}
@@ -286,8 +294,14 @@ impl SagaQuiesceHandle {
286294
.await;
287295
}
288296

297+
/// Returns a summary of internal state for debugging (involves a clone)
298+
pub fn status(&self) -> SagaQuiesceStatus {
299+
self.inner.borrow().clone()
300+
}
301+
289302
/// Returns information about running sagas (involves a clone)
290-
pub fn sagas_pending(&self) -> IdOrdMap<PendingSagaInfo> {
303+
#[cfg(test)]
304+
fn sagas_pending(&self) -> IdOrdMap<PendingSagaInfo> {
291305
self.inner.borrow().sagas_pending.clone()
292306
}
293307

@@ -593,7 +607,7 @@ impl SagaQuiesceHandle {
593607
}
594608
}
595609

596-
impl SagaQuiesceInner {
610+
impl SagaQuiesceStatus {
597611
/// Returns whether sagas are fully drained
598612
///
599613
/// This condition is not permanent. New sagas can be re-assigned to this
@@ -647,7 +661,7 @@ impl SagaQuiesceInner {
647661
#[must_use = "must record the saga completion future once the saga is running"]
648662
pub struct NewlyPendingSagaRef {
649663
log: Logger,
650-
quiesce: watch::Sender<SagaQuiesceInner>,
664+
quiesce: watch::Sender<SagaQuiesceStatus>,
651665
saga_id: steno::SagaId,
652666
init_finished: bool,
653667
}

0 commit comments

Comments
 (0)