Skip to content

Commit 73fff49

Browse files
committed
review feedback
1 parent 7e271d1 commit 73fff49

File tree

3 files changed

+72
-14
lines changed

3 files changed

+72
-14
lines changed

dev-tools/omdb/src/bin/omdb/nexus/quiesce.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use chrono::TimeDelta;
1111
use chrono::Utc;
1212
use clap::Args;
1313
use clap::Subcommand;
14+
use nexus_client::types::PendingRecovery;
1415
use nexus_client::types::QuiesceState;
1516
use nexus_client::types::QuiesceStatus;
1617
use nexus_client::types::SagaQuiesceStatus;
@@ -160,6 +161,7 @@ async fn quiesce_show(
160161
reassignment_pending,
161162
recovered_blueprint_id,
162163
recovered_reassignment_generation,
164+
recovery_pending,
163165
} = sagas;
164166

165167
println!("saga quiesce:");
@@ -172,7 +174,7 @@ async fn quiesce_show(
172174
.unwrap_or("none")
173175
);
174176
println!(
175-
" blueprint for last recovery pass: {}",
177+
" blueprint for last completed recovery pass: {}",
176178
recovered_blueprint_id
177179
.map(|s| s.to_string())
178180
.as_deref()
@@ -195,6 +197,17 @@ async fn quiesce_show(
195197
" recovered at least once successfully: {}",
196198
if first_recovery_complete { "yes" } else { "no" },
197199
);
200+
print!(" recovery pending: ");
201+
if let Some(PendingRecovery { generation, blueprint_id }) = recovery_pending
202+
{
203+
println!(
204+
"yes (generation {}, blueprint id {})",
205+
generation,
206+
blueprint_id.map(|s| s.to_string()).as_deref().unwrap_or("none")
207+
);
208+
} else {
209+
println!("no");
210+
}
198211

199212
println!(" sagas running: {}", sagas_pending.len());
200213
for saga in &sagas_pending {

nexus/types/src/quiesce.rs

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -183,11 +183,18 @@ pub struct SagaQuiesceStatus {
183183
/// there's a new blueprint that expunges a different Nexus zone.
184184
drained_blueprint_id: Option<BlueprintUuid>,
185185

186-
/// whether a saga recovery operation is ongoing, and if one is:
187-
/// - what `reassignment_generation` was when it started
188-
/// - which blueprint id we'll be fully caught up to upon completion
189-
#[serde(skip)] // XXX-dap
190-
recovery_pending: Option<(Generation, Option<BlueprintUuid>)>,
186+
/// If a recovery pass is ongoing, a snapshot of reassignment state when it
187+
/// started (which reflects what we'll be caught up to when it finishes)
188+
recovery_pending: Option<PendingRecovery>,
189+
}
190+
191+
/// Snapshot of reassignment state when a recovery pass started
192+
#[derive(Debug, Clone, Serialize, JsonSchema)]
193+
struct PendingRecovery {
194+
/// what `reassignment_generation` was when this recovery started
195+
generation: Generation,
196+
/// which blueprint id we'd be fully caught up to upon completion
197+
blueprint_id: Option<BlueprintUuid>,
191198
}
192199

193200
impl SagaQuiesceHandle {
@@ -251,7 +258,7 @@ impl SagaQuiesceHandle {
251258
}
252259
};
253260

254-
q.latch_drained_blueprint_id();
261+
q.latch_blueprint_if_drained();
255262
changed
256263
});
257264
}
@@ -423,7 +430,7 @@ impl SagaQuiesceHandle {
423430
}
424431
}
425432

426-
q.latch_drained_blueprint_id();
433+
q.latch_blueprint_if_drained();
427434
});
428435
}
429436

@@ -468,8 +475,10 @@ impl SagaQuiesceHandle {
468475
"recovery_start() called twice without intervening \
469476
recovery_done() (concurrent calls to recover()?)",
470477
);
471-
q.recovery_pending =
472-
Some((q.reassignment_generation, q.reassignment_blueprint_id));
478+
q.recovery_pending = Some(PendingRecovery {
479+
generation: q.reassignment_generation,
480+
blueprint_id: q.reassignment_blueprint_id,
481+
});
473482
});
474483

475484
info!(&self.log, "saga recovery pass starting");
@@ -480,7 +489,8 @@ impl SagaQuiesceHandle {
480489
fn recovery_done(&self, success: bool) {
481490
let log = self.log.clone();
482491
self.inner.send_modify(|q| {
483-
let Some((generation, blueprint_id)) = q.recovery_pending.take()
492+
let Some(PendingRecovery { generation, blueprint_id }) =
493+
q.recovery_pending.take()
484494
else {
485495
panic!("cannot finish saga recovery when it was not running");
486496
};
@@ -495,7 +505,7 @@ impl SagaQuiesceHandle {
495505
q.recovered_blueprint_id = blueprint_id;
496506
q.recovered_reassignment_generation = generation;
497507
q.first_recovery_complete = true;
498-
q.latch_drained_blueprint_id();
508+
q.latch_blueprint_if_drained();
499509
} else {
500510
info!(&log, "saga recovery pass failed");
501511
}
@@ -636,7 +646,7 @@ impl SagaQuiesceStatus {
636646
/// result of that blueprint. The rest of our bookkeeping would reflect
637647
/// that we're not fully drained, which is true, but we still want to be
638648
/// able to report that we were fully drained _as of this blueprint_.
639-
fn latch_drained_blueprint_id(&mut self) {
649+
fn latch_blueprint_if_drained(&mut self) {
640650
if self.is_fully_drained() {
641651
// If we've recovered up through a given blueprint id and are now
642652
// fully drained, then we have definitely fully drained up through
@@ -705,7 +715,7 @@ impl NewlyPendingSagaRef {
705715
q.sagas_pending
706716
.remove(&saga_id)
707717
.expect("saga should have been running");
708-
q.latch_drained_blueprint_id();
718+
q.latch_blueprint_if_drained();
709719
});
710720
rv
711721
});

openapi/nexus-internal.json

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6527,6 +6527,32 @@
65276527
"by_baseboard"
65286528
]
65296529
},
6530+
"PendingRecovery": {
6531+
"description": "Snapshot of reassignment state when a recovery pass started",
6532+
"type": "object",
6533+
"properties": {
6534+
"blueprint_id": {
6535+
"nullable": true,
6536+
"description": "which blueprint id we'd be fully caught up to upon completion",
6537+
"allOf": [
6538+
{
6539+
"$ref": "#/components/schemas/TypedUuidForBlueprintKind"
6540+
}
6541+
]
6542+
},
6543+
"generation": {
6544+
"description": "what `reassignment_generation` was when this recovery started",
6545+
"allOf": [
6546+
{
6547+
"$ref": "#/components/schemas/Generation"
6548+
}
6549+
]
6550+
}
6551+
},
6552+
"required": [
6553+
"generation"
6554+
]
6555+
},
65306556
"PendingSagaInfo": {
65316557
"description": "Describes a pending saga (for debugging why quiesce is stuck)",
65326558
"type": "object",
@@ -8301,6 +8327,15 @@
83018327
}
83028328
]
83038329
},
8330+
"recovery_pending": {
8331+
"nullable": true,
8332+
"description": "If a recovery pass is ongoing, a snapshot of reassignment state when it started (which reflects what we'll be caught up to when it finishes)",
8333+
"allOf": [
8334+
{
8335+
"$ref": "#/components/schemas/PendingRecovery"
8336+
}
8337+
]
8338+
},
83048339
"sagas_pending": {
83058340
"title": "IdOrdMap",
83068341
"description": "list of sagas we need to wait to complete before quiescing\n\nThese are basically running sagas. They may have been created in this Nexus process lifetime or created in another process and then recovered in this one.",

0 commit comments

Comments
 (0)