Skip to content

Commit 362c4f7

Browse files
authored
Nexus BG tasks: Use loaded inventory instead of collected inventory (PR 2/2) (#9149)
(This is the second half of #9148.) As of this PR, all (I believe!) the background tasks that want to act on the latest inventory collection now act on the latest inventory collection loaded by the `inventory_loader` bg task. That task runs frequently. Fixes #8882. It's still possible two Nexuses could duel if one is acting on a newer inventory collection, but that's always true. The window for that should now be ~15 seconds, instead of waiting until the next time _our own Nexus_ collects inventory. I did not change other places in Nexus where we're reading inventory outside of background tasks. I think it would probably make sense to audit for those and make them read from this watch channel too? If that sounds right I can file an issue.
1 parent 555af71 commit 362c4f7

File tree

6 files changed

+118
-137
lines changed

6 files changed

+118
-137
lines changed

nexus/src/app/background/init.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -512,13 +512,14 @@ impl BackgroundTasksInitializer {
512512
datastore.clone(),
513513
self.inventory_load_tx,
514514
);
515+
let inventory_load_watcher = inventory_loader.watcher();
515516
driver.register(TaskDefinition {
516517
name: "inventory_loader",
517518
description: "loads the latest inventory collection from the DB",
518519
period: config.inventory.period_secs_load,
519520
task_impl: Box::new(inventory_loader),
520521
opctx: opctx.child(BTreeMap::new()),
521-
watchers: vec![Box::new(inventory_collect_watcher.clone())],
522+
watchers: vec![Box::new(inventory_collect_watcher)],
522523
activator: task_inventory_loader,
523524
});
524525

@@ -544,7 +545,7 @@ impl BackgroundTasksInitializer {
544545
let blueprint_planner = blueprint_planner::BlueprintPlanner::new(
545546
datastore.clone(),
546547
reconfigurator_config_watcher.clone(),
547-
inventory_collect_watcher.clone(),
548+
inventory_load_watcher.clone(),
548549
rx_blueprint.clone(),
549550
);
550551
let rx_planner = blueprint_planner.watcher();
@@ -555,7 +556,7 @@ impl BackgroundTasksInitializer {
555556
task_impl: Box::new(blueprint_planner),
556557
opctx: opctx.child(BTreeMap::new()),
557558
watchers: vec![
558-
Box::new(inventory_collect_watcher.clone()),
559+
Box::new(inventory_load_watcher.clone()),
559560
Box::new(rx_blueprint.clone()),
560561
Box::new(reconfigurator_config_watcher),
561562
],
@@ -620,13 +621,13 @@ impl BackgroundTasksInitializer {
620621
task_impl: Box::new(
621622
physical_disk_adoption::PhysicalDiskAdoption::new(
622623
datastore.clone(),
623-
inventory_collect_watcher.clone(),
624+
inventory_load_watcher.clone(),
624625
config.physical_disk_adoption.disable,
625626
rack_id,
626627
),
627628
),
628629
opctx: opctx.child(BTreeMap::new()),
629-
watchers: vec![Box::new(inventory_collect_watcher.clone())],
630+
watchers: vec![Box::new(inventory_load_watcher.clone())],
630631
activator: task_physical_disk_adoption,
631632
});
632633

@@ -641,10 +642,11 @@ impl BackgroundTasksInitializer {
641642
blueprint_rendezvous::BlueprintRendezvous::new(
642643
datastore.clone(),
643644
rx_blueprint.clone(),
645+
inventory_load_watcher.clone(),
644646
),
645647
),
646648
opctx: opctx.child(BTreeMap::new()),
647-
watchers: vec![Box::new(inventory_collect_watcher.clone())],
649+
watchers: vec![Box::new(inventory_load_watcher.clone())],
648650
activator: task_blueprint_rendezvous,
649651
});
650652

@@ -674,6 +676,7 @@ impl BackgroundTasksInitializer {
674676
task_impl: Box::new(ServiceZoneNatTracker::new(
675677
datastore.clone(),
676678
resolver.clone(),
679+
inventory_load_watcher.clone(),
677680
)),
678681
opctx: opctx.child(BTreeMap::new()),
679682
watchers: vec![],

nexus/src/app/background/tasks/blueprint_planner.rs

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ use nexus_types::deployment::BlueprintSource;
1919
use nexus_types::deployment::PlanningReport;
2020
use nexus_types::deployment::{Blueprint, BlueprintTarget};
2121
use nexus_types::internal_api::background::BlueprintPlannerStatus;
22+
use nexus_types::inventory::Collection;
2223
use omicron_common::api::external::LookupType;
23-
use omicron_uuid_kinds::CollectionUuid;
2424
use omicron_uuid_kinds::GenericUuid as _;
2525
use serde_json::json;
2626
use slog_error_chain::InlineErrorChain;
@@ -31,7 +31,7 @@ use tokio::sync::watch::{self, Receiver, Sender};
3131
pub struct BlueprintPlanner {
3232
datastore: Arc<DataStore>,
3333
rx_config: Receiver<ReconfiguratorConfigLoaderState>,
34-
rx_inventory: Receiver<Option<CollectionUuid>>,
34+
rx_inventory: Receiver<Option<Arc<Collection>>>,
3535
rx_blueprint: Receiver<Option<Arc<(BlueprintTarget, Blueprint)>>>,
3636
tx_blueprint: Sender<Option<Arc<(BlueprintTarget, Blueprint)>>>,
3737
blueprint_limit: u64,
@@ -57,7 +57,7 @@ impl BlueprintPlanner {
5757
pub fn new(
5858
datastore: Arc<DataStore>,
5959
rx_config: Receiver<ReconfiguratorConfigLoaderState>,
60-
rx_inventory: Receiver<Option<CollectionUuid>>,
60+
rx_inventory: Receiver<Option<Arc<Collection>>>,
6161
rx_blueprint: Receiver<Option<Arc<(BlueprintTarget, Blueprint)>>>,
6262
) -> Self {
6363
let (tx_blueprint, _) = watch::channel(None);
@@ -114,10 +114,12 @@ impl BlueprintPlanner {
114114
let (target, parent) = &*loaded;
115115
let parent_blueprint_id = parent.id;
116116

117-
// Get the inventory most recently seen by the collection
118-
// background task. The value is `Copy`, so with the deref
119-
// we don't block the channel.
120-
let Some(collection_id) = *self.rx_inventory.borrow_and_update() else {
117+
// Get the inventory most recently seen by the inventory loader
118+
// background task. We clone the Arc to avoid keeping the channel locked
119+
// for the rest of our execution.
120+
let Some(collection) =
121+
self.rx_inventory.borrow_and_update().as_ref().map(Arc::clone)
122+
else {
121123
warn!(
122124
&opctx.log,
123125
"blueprint planning skipped";
@@ -127,25 +129,6 @@ impl BlueprintPlanner {
127129
"no inventory collection available",
128130
));
129131
};
130-
let collection = match self
131-
.datastore
132-
.inventory_collection_read(opctx, collection_id)
133-
.await
134-
{
135-
Ok(collection) => collection,
136-
Err(error) => {
137-
error!(
138-
&opctx.log,
139-
"can't read inventory collection";
140-
"collection_id" => %collection_id,
141-
"error" => %error,
142-
);
143-
return BlueprintPlannerStatus::Error(format!(
144-
"can't read inventory collection {}: {}",
145-
collection_id, error
146-
));
147-
}
148-
};
149132

150133
// Assemble the planning context.
151134
let input = match PlanningInputFromDb::assemble(
@@ -438,6 +421,7 @@ mod test {
438421
use crate::app::background::tasks::blueprint_execution::BlueprintExecutor;
439422
use crate::app::background::tasks::blueprint_load::TargetBlueprintLoader;
440423
use crate::app::background::tasks::inventory_collection::InventoryCollector;
424+
use crate::app::background::tasks::inventory_load::InventoryLoader;
441425
use crate::app::{background::Activator, quiesce::NexusQuiesceHandle};
442426
use assert_matches::assert_matches;
443427
use nexus_inventory::now_db_precision;
@@ -467,10 +451,10 @@ mod test {
467451

468452
// Spin up the blueprint loader background task.
469453
let (tx_loader, _) = watch::channel(None);
470-
let mut loader =
454+
let mut bp_loader =
471455
TargetBlueprintLoader::new(datastore.clone(), tx_loader);
472-
let mut rx_loader = loader.watcher();
473-
loader.activate(&opctx).await;
456+
let mut rx_loader = bp_loader.watcher();
457+
bp_loader.activate(&opctx).await;
474458
let (_initial_target, initial_blueprint) = &*rx_loader
475459
.borrow_and_update()
476460
.clone()
@@ -490,9 +474,14 @@ mod test {
490474
1,
491475
false,
492476
);
493-
let rx_collector = collector.watcher();
494477
collector.activate(&opctx).await;
495478

479+
// Spin up the inventory loader background task.
480+
let mut inv_loader =
481+
InventoryLoader::new(datastore.clone(), watch::Sender::new(None));
482+
let rx_inventory = inv_loader.watcher();
483+
inv_loader.activate(&opctx).await;
484+
496485
// Enable the planner
497486
let (_tx, rx_config_loader) = watch::channel(
498487
ReconfiguratorConfigLoaderState::Loaded(ReconfiguratorConfigView {
@@ -509,10 +498,9 @@ mod test {
509498
let mut planner = BlueprintPlanner::new(
510499
datastore.clone(),
511500
rx_config_loader,
512-
rx_collector,
501+
rx_inventory,
513502
rx_loader.clone(),
514503
);
515-
let _rx_planner = planner.watcher();
516504

517505
// On activation, the planner should run successfully and generate
518506
// a new target blueprint.
@@ -536,7 +524,7 @@ mod test {
536524
};
537525

538526
// Load and check the new target blueprint.
539-
loader.activate(&opctx).await;
527+
bp_loader.activate(&opctx).await;
540528
let (target, blueprint) = &*rx_loader
541529
.borrow_and_update()
542530
.clone()
@@ -571,7 +559,7 @@ mod test {
571559
.expect("can't enable execution");
572560

573561
// Ping the loader again so it gets the updated target.
574-
loader.activate(&opctx).await;
562+
bp_loader.activate(&opctx).await;
575563
let (target, blueprint) = &*rx_loader
576564
.borrow_and_update()
577565
.clone()
@@ -584,6 +572,7 @@ mod test {
584572

585573
// Trigger an inventory collection.
586574
collector.activate(&opctx).await;
575+
inv_loader.activate(&opctx).await;
587576

588577
// Execute the plan.
589578
let (dummy_tx, _dummy_rx) = watch::channel(PendingMgsUpdates::new());

nexus/src/app/background/tasks/blueprint_rendezvous.rs

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ use nexus_reconfigurator_rendezvous::reconcile_blueprint_rendezvous_tables;
1414
use nexus_types::{
1515
deployment::{Blueprint, BlueprintTarget},
1616
internal_api::background::BlueprintRendezvousStatus,
17+
inventory::Collection,
1718
};
1819
use serde_json::json;
19-
use slog_error_chain::InlineErrorChain;
2020
use std::sync::Arc;
2121
use tokio::sync::watch;
2222

@@ -26,6 +26,7 @@ use tokio::sync::watch;
2626
pub struct BlueprintRendezvous {
2727
datastore: Arc<DataStore>,
2828
rx_blueprint: watch::Receiver<Option<Arc<(BlueprintTarget, Blueprint)>>>,
29+
rx_inventory: watch::Receiver<Option<Arc<Collection>>>,
2930
}
3031

3132
impl BlueprintRendezvous {
@@ -34,8 +35,9 @@ impl BlueprintRendezvous {
3435
rx_blueprint: watch::Receiver<
3536
Option<Arc<(BlueprintTarget, Blueprint)>>,
3637
>,
38+
rx_inventory: watch::Receiver<Option<Arc<Collection>>>,
3739
) -> Self {
38-
Self { datastore, rx_blueprint }
40+
Self { datastore, rx_blueprint, rx_inventory }
3941
}
4042

4143
/// Implementation for `BackgroundTask::activate` for `BlueprintRendezvous`,
@@ -56,27 +58,12 @@ impl BlueprintRendezvous {
5658
return json!({"error": "no blueprint" });
5759
};
5860

59-
// Get the latest inventory collection
60-
let maybe_collection = match self
61-
.datastore
62-
.inventory_get_latest_collection(opctx)
63-
.await
64-
{
65-
Ok(maybe_collection) => maybe_collection,
66-
Err(err) => {
67-
let err = InlineErrorChain::new(&err);
68-
warn!(
69-
&opctx.log, "Blueprint rendezvous: skipped";
70-
"reason" => "failed to read latest inventory collection",
71-
&err,
72-
);
73-
return json!({ "error":
74-
format!("failed reading inventory collection: {err}"),
75-
});
76-
}
77-
};
78-
79-
let Some(collection) = maybe_collection else {
61+
// Get the inventory most recently seen by the inventory loader
62+
// background task. We clone the Arc to avoid keeping the channel locked
63+
// for the rest of our execution.
64+
let Some(collection) =
65+
self.rx_inventory.borrow_and_update().as_ref().map(Arc::clone)
66+
else {
8067
warn!(
8168
&opctx.log, "Blueprint rendezvous: skipped";
8269
"reason" => "no inventory collection",

nexus/src/app/background/tasks/inventory_load.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ impl InventoryLoader {
5454
Self { datastore, tx }
5555
}
5656

57+
pub fn watcher(&self) -> watch::Receiver<Option<Arc<Collection>>> {
58+
self.tx.subscribe()
59+
}
60+
5761
async fn load_if_needed(&self, opctx: &OpContext) -> InventoryLoadStatus {
5862
// Set up a logger for this activation that includes metadata about
5963
// the current target.

0 commit comments

Comments
 (0)