Skip to content

Commit 0cdfd08

Browse files
authored
[reconfigurator] Add RPW to reconcile debug dataset rendezvous table (#7342)
This is PR 2 of 2 and builds on #7341; it adds an RPW that calls the library added in that PR to actually reconcile blueprint+inventory and update the debug dataset rendezvous table, and changes the support bundle query that picks a debug dataset to use this new table.
1 parent bb7285c commit 0cdfd08

File tree

17 files changed

+256
-36
lines changed

17 files changed

+256
-36
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dev-tools/omdb/tests/env.out

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ task: "blueprint_loader"
4343
Loads the current target blueprint from the DB
4444

4545

46+
task: "blueprint_rendezvous"
47+
reconciles blueprints and inventory collection, updating Reconfigurator-
48+
owned rendezvous tables that other subsystems consume
49+
50+
4651
task: "crdb_node_id_collector"
4752
Collects node IDs of running CockroachDB zones
4853

@@ -218,6 +223,11 @@ task: "blueprint_loader"
218223
Loads the current target blueprint from the DB
219224

220225

226+
task: "blueprint_rendezvous"
227+
reconciles blueprints and inventory collection, updating Reconfigurator-
228+
owned rendezvous tables that other subsystems consume
229+
230+
221231
task: "crdb_node_id_collector"
222232
Collects node IDs of running CockroachDB zones
223233

@@ -380,6 +390,11 @@ task: "blueprint_loader"
380390
Loads the current target blueprint from the DB
381391

382392

393+
task: "blueprint_rendezvous"
394+
reconciles blueprints and inventory collection, updating Reconfigurator-
395+
owned rendezvous tables that other subsystems consume
396+
397+
383398
task: "crdb_node_id_collector"
384399
Collects node IDs of running CockroachDB zones
385400

dev-tools/omdb/tests/successes.out

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,11 @@ task: "blueprint_loader"
262262
Loads the current target blueprint from the DB
263263

264264

265+
task: "blueprint_rendezvous"
266+
reconciles blueprints and inventory collection, updating Reconfigurator-
267+
owned rendezvous tables that other subsystems consume
268+
269+
265270
task: "crdb_node_id_collector"
266271
Collects node IDs of running CockroachDB zones
267272

@@ -499,6 +504,13 @@ task: "bfd_manager"
499504
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
500505
last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }
501506

507+
task: "blueprint_rendezvous"
508+
configured period: every <REDACTED_DURATION>m
509+
currently executing: no
510+
last completed activation: <REDACTED ITERATIONS>, triggered by a dependent task completing
511+
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
512+
last completion reported error: no blueprint
513+
502514
task: "crdb_node_id_collector"
503515
configured period: every <REDACTED_DURATION>m
504516
currently executing: no
@@ -951,6 +963,13 @@ task: "bfd_manager"
951963
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
952964
last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }
953965

966+
task: "blueprint_rendezvous"
967+
configured period: every <REDACTED_DURATION>m
968+
currently executing: no
969+
last completed activation: <REDACTED ITERATIONS>, triggered by a dependent task completing
970+
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
971+
last completion reported error: no blueprint
972+
954973
task: "crdb_node_id_collector"
955974
configured period: every <REDACTED_DURATION>m
956975
currently executing: no

nexus-config/src/nexus_config.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,12 @@ pub struct BlueprintTasksConfig {
580580
#[serde_as(as = "DurationSeconds<u64>")]
581581
pub period_secs_execute: Duration,
582582

583+
/// period (in seconds) for periodic activations of the background task that
584+
/// reconciles the latest blueprint and latest inventory collection into
585+
/// Rencofigurator rendezvous tables
586+
#[serde_as(as = "DurationSeconds<u64>")]
587+
pub period_secs_rendezvous: Duration,
588+
583589
/// period (in seconds) for periodic activations of the background task that
584590
/// collects the node IDs of CockroachDB zones
585591
#[serde_as(as = "DurationSeconds<u64>")]
@@ -953,6 +959,7 @@ mod test {
953959
phantom_disks.period_secs = 30
954960
blueprints.period_secs_load = 10
955961
blueprints.period_secs_execute = 60
962+
blueprints.period_secs_rendezvous = 300
956963
blueprints.period_secs_collect_crdb_node_ids = 180
957964
sync_service_zone_nat.period_secs = 30
958965
switch_port_settings_manager.period_secs = 30
@@ -1108,6 +1115,7 @@ mod test {
11081115
period_secs_execute: Duration::from_secs(60),
11091116
period_secs_collect_crdb_node_ids:
11101117
Duration::from_secs(180),
1118+
period_secs_rendezvous: Duration::from_secs(300),
11111119
},
11121120
sync_service_zone_nat: SyncServiceZoneNatConfig {
11131121
period_secs: Duration::from_secs(30)
@@ -1231,6 +1239,7 @@ mod test {
12311239
phantom_disks.period_secs = 30
12321240
blueprints.period_secs_load = 10
12331241
blueprints.period_secs_execute = 60
1242+
blueprints.period_secs_rendezvous = 300
12341243
blueprints.period_secs_collect_crdb_node_ids = 180
12351244
sync_service_zone_nat.period_secs = 30
12361245
switch_port_settings_manager.period_secs = 30

nexus/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ nexus-metrics-producer-gc.workspace = true
109109
nexus-reconfigurator-execution.workspace = true
110110
nexus-reconfigurator-planning.workspace = true
111111
nexus-reconfigurator-preparation.workspace = true
112+
nexus-reconfigurator-rendezvous.workspace = true
112113
nexus-sled-agent-shared.workspace = true
113114
nexus-types.workspace = true
114115
omicron-common.workspace = true

nexus/db-model/src/schema.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2032,6 +2032,7 @@ allow_tables_to_appear_in_same_query!(hw_baseboard_id, inv_sled_agent,);
20322032
allow_tables_to_appear_in_same_query!(
20332033
bp_omicron_zone,
20342034
bp_target,
2035+
rendezvous_debug_dataset,
20352036
dataset,
20362037
disk,
20372038
image,

nexus/db-queries/src/db/datastore/support_bundle.rs

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ use crate::db;
1111
use crate::db::error::public_error_from_diesel;
1212
use crate::db::error::ErrorHandler;
1313
use crate::db::lookup::LookupPath;
14-
use crate::db::model::Dataset;
15-
use crate::db::model::DatasetKind;
14+
use crate::db::model::RendezvousDebugDataset;
1615
use crate::db::model::SupportBundle;
1716
use crate::db::model::SupportBundleState;
1817
use crate::db::pagination::paginated;
@@ -21,7 +20,6 @@ use crate::transaction_retry::OptionalError;
2120
use async_bb8_diesel::AsyncRunQueryDsl;
2221
use diesel::prelude::*;
2322
use futures::FutureExt;
24-
use nexus_types::identity::Asset;
2523
use omicron_common::api::external;
2624
use omicron_common::api::external::CreateResult;
2725
use omicron_common::api::external::DataPageParams;
@@ -31,7 +29,6 @@ use omicron_common::api::external::LookupResult;
3129
use omicron_uuid_kinds::GenericUuid;
3230
use omicron_uuid_kinds::OmicronZoneUuid;
3331
use omicron_uuid_kinds::SupportBundleUuid;
34-
use omicron_uuid_kinds::ZpoolUuid;
3532
use uuid::Uuid;
3633

3734
const CANNOT_ALLOCATE_ERR_MSG: &'static str =
@@ -93,21 +90,20 @@ impl DataStore {
9390
let err = err.clone();
9491

9592
async move {
96-
use db::schema::dataset::dsl as dataset_dsl;
93+
use db::schema::rendezvous_debug_dataset::dsl as dataset_dsl;
9794
use db::schema::support_bundle::dsl as support_bundle_dsl;
9895

9996
// Observe all "non-deleted, debug datasets".
10097
//
10198
// Return the first one we find that doesn't already
10299
// have a support bundle allocated to it.
103-
let free_dataset = dataset_dsl::dataset
104-
.filter(dataset_dsl::time_deleted.is_null())
105-
.filter(dataset_dsl::kind.eq(DatasetKind::Debug))
100+
let free_dataset = dataset_dsl::rendezvous_debug_dataset
101+
.filter(dataset_dsl::time_tombstoned.is_null())
106102
.left_join(support_bundle_dsl::support_bundle.on(
107103
dataset_dsl::id.eq(support_bundle_dsl::dataset_id),
108104
))
109105
.filter(support_bundle_dsl::dataset_id.is_null())
110-
.select(Dataset::as_select())
106+
.select(RendezvousDebugDataset::as_select())
111107
.first_async(&conn)
112108
.await
113109
.optional()?;
@@ -129,7 +125,7 @@ impl DataStore {
129125

130126
let bundle = SupportBundle::new(
131127
reason_for_creation,
132-
ZpoolUuid::from_untyped_uuid(dataset.pool_id),
128+
dataset.pool_id(),
133129
dataset.id(),
134130
this_nexus_id,
135131
);
@@ -499,6 +495,7 @@ mod test {
499495
use omicron_uuid_kinds::DatasetUuid;
500496
use omicron_uuid_kinds::PhysicalDiskUuid;
501497
use omicron_uuid_kinds::SledUuid;
498+
use omicron_uuid_kinds::ZpoolUuid;
502499
use rand::Rng;
503500

504501
fn authz_support_bundle_from_id(
@@ -569,6 +566,7 @@ mod test {
569566
opctx: &OpContext,
570567
) {
571568
let rack_id = Uuid::new_v4();
569+
let blueprint_id = BlueprintUuid::new_v4();
572570
let sled = SledUpdate::new(
573571
*self.sled.as_untyped_uuid(),
574572
"[::1]:0".parse().unwrap(),
@@ -601,18 +599,17 @@ mod test {
601599
datastore
602600
.zpool_insert(opctx, zpool)
603601
.await
604-
.expect("failed to upsert zpool");
602+
.expect("inserted zpool");
605603

606-
let dataset = Dataset::new(
604+
let dataset = RendezvousDebugDataset::new(
607605
pool.dataset,
608-
pool.pool.into_untyped_uuid(),
609-
None,
610-
DebugDatasetKind,
606+
pool.pool,
607+
blueprint_id,
611608
);
612609
datastore
613-
.dataset_upsert(dataset)
610+
.debug_dataset_insert_if_not_exists(opctx, dataset)
614611
.await
615-
.expect("failed to upsert dataset");
612+
.expect("inserted debug dataset");
616613
}
617614
}
618615
}

nexus/examples/config-second.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ support_bundle_collector.period_secs = 30
126126
decommissioned_disk_cleaner.period_secs = 60
127127
blueprints.period_secs_load = 10
128128
blueprints.period_secs_execute = 60
129+
blueprints.period_secs_rendezvous = 300
129130
blueprints.period_secs_collect_crdb_node_ids = 180
130131
sync_service_zone_nat.period_secs = 30
131132
switch_port_settings_manager.period_secs = 30

nexus/examples/config.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ support_bundle_collector.period_secs = 30
112112
decommissioned_disk_cleaner.period_secs = 60
113113
blueprints.period_secs_load = 10
114114
blueprints.period_secs_execute = 60
115+
blueprints.period_secs_rendezvous = 300
115116
blueprints.period_secs_collect_crdb_node_ids = 180
116117
sync_service_zone_nat.period_secs = 30
117118
switch_port_settings_manager.period_secs = 30

nexus/src/app/background/init.rs

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ use super::tasks::abandoned_vmm_reaper;
9292
use super::tasks::bfd;
9393
use super::tasks::blueprint_execution;
9494
use super::tasks::blueprint_load;
95+
use super::tasks::blueprint_rendezvous;
9596
use super::tasks::crdb_node_id_collector;
9697
use super::tasks::decommissioned_disk_cleaner;
9798
use super::tasks::dns_config;
@@ -156,6 +157,7 @@ pub struct BackgroundTasks {
156157
pub task_phantom_disks: Activator,
157158
pub task_blueprint_loader: Activator,
158159
pub task_blueprint_executor: Activator,
160+
pub task_blueprint_rendezvous: Activator,
159161
pub task_crdb_node_id_collector: Activator,
160162
pub task_service_zone_nat_tracker: Activator,
161163
pub task_switch_port_settings_manager: Activator,
@@ -243,6 +245,7 @@ impl BackgroundTasksInitializer {
243245
task_phantom_disks: Activator::new(),
244246
task_blueprint_loader: Activator::new(),
245247
task_blueprint_executor: Activator::new(),
248+
task_blueprint_rendezvous: Activator::new(),
246249
task_crdb_node_id_collector: Activator::new(),
247250
task_service_zone_nat_tracker: Activator::new(),
248251
task_switch_port_settings_manager: Activator::new(),
@@ -311,6 +314,7 @@ impl BackgroundTasksInitializer {
311314
task_phantom_disks,
312315
task_blueprint_loader,
313316
task_blueprint_executor,
317+
task_blueprint_rendezvous,
314318
task_crdb_node_id_collector,
315319
task_service_zone_nat_tracker,
316320
task_switch_port_settings_manager,
@@ -491,19 +495,15 @@ impl BackgroundTasksInitializer {
491495
period: config.blueprints.period_secs_collect_crdb_node_ids,
492496
task_impl: Box::new(crdb_node_id_collector),
493497
opctx: opctx.child(BTreeMap::new()),
494-
watchers: vec![Box::new(rx_blueprint)],
498+
watchers: vec![Box::new(rx_blueprint.clone())],
495499
activator: task_crdb_node_id_collector,
496500
});
497501

498502
// Background task: inventory collector
499503
//
500-
// This currently depends on the "output" of the blueprint executor in
504+
// This depends on the "output" of the blueprint executor in
501505
// order to automatically trigger inventory collection whenever the
502-
// blueprint executor runs. In the limit, this could become a problem
503-
// because the blueprint executor might also depend indirectly on the
504-
// inventory collector. In that case, we could expose `Activator`s to
505-
// one or both of these tasks to directly activate the other precisely
506-
// when needed. But for now, this works.
506+
// blueprint executor runs.
507507
let inventory_watcher = {
508508
let collector = inventory_collection::InventoryCollector::new(
509509
datastore.clone(),
@@ -563,10 +563,28 @@ impl BackgroundTasksInitializer {
563563
),
564564
),
565565
opctx: opctx.child(BTreeMap::new()),
566-
watchers: vec![Box::new(inventory_watcher)],
566+
watchers: vec![Box::new(inventory_watcher.clone())],
567567
activator: task_physical_disk_adoption,
568568
});
569569

570+
driver.register(TaskDefinition {
571+
name: "blueprint_rendezvous",
572+
description:
573+
"reconciles blueprints and inventory collection, updating \
574+
Reconfigurator-owned rendezvous tables that other subsystems \
575+
consume",
576+
period: config.blueprints.period_secs_rendezvous,
577+
task_impl: Box::new(
578+
blueprint_rendezvous::BlueprintRendezvous::new(
579+
datastore.clone(),
580+
rx_blueprint.clone(),
581+
),
582+
),
583+
opctx: opctx.child(BTreeMap::new()),
584+
watchers: vec![Box::new(inventory_watcher.clone())],
585+
activator: task_blueprint_rendezvous,
586+
});
587+
570588
driver.register(TaskDefinition {
571589
name: "decommissioned_disk_cleaner",
572590
description:

0 commit comments

Comments
 (0)