Skip to content
Merged
72 changes: 72 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus;
use nexus_types::internal_api::background::RegionSnapshotReplacementStepStatus;
use nexus_types::internal_api::background::SupportBundleCleanupReport;
use nexus_types::internal_api::background::SupportBundleCollectionReport;
use nexus_types::internal_api::background::TufArtifactReplicationCounters;
use nexus_types::internal_api::background::TufArtifactReplicationRequest;
use nexus_types::internal_api::background::TufArtifactReplicationStatus;
use nexus_types::inventory::BaseboardId;
use omicron_uuid_kinds::BlueprintUuid;
use omicron_uuid_kinds::CollectionUuid;
Expand Down Expand Up @@ -952,6 +955,9 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
"support_bundle_collector" => {
print_task_support_bundle_collector(details);
}
"tuf_artifact_replication" => {
print_task_tuf_artifact_replication(details);
}
_ => {
println!(
"warning: unknown background task: {:?} \
Expand Down Expand Up @@ -2122,6 +2128,72 @@ fn print_task_support_bundle_collector(details: &serde_json::Value) {
}
}

fn print_task_tuf_artifact_replication(details: &serde_json::Value) {
fn print_counters(counters: TufArtifactReplicationCounters) {
const ROWS: &[&str] = &[
"list ok:",
"list err:",
"put ok:",
"put err:",
"copy ok:",
"copy err:",
"delete ok:",
"delete err:",
];
const WIDTH: usize = const_max_len(ROWS);

for (label, value) in ROWS.iter().zip([
counters.list_ok,
counters.list_err,
counters.put_ok,
counters.put_err,
counters.copy_ok,
counters.copy_err,
counters.delete_ok,
counters.delete_err,
]) {
println!(" {label:<WIDTH$} {value:>3}");
}
}

match serde_json::from_value::<TufArtifactReplicationStatus>(
details.clone(),
) {
Err(error) => eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
),
Ok(status) => {
println!(" request ringbuf:");
if status.request_debug_ringbuf.is_empty() {
println!(" [no entries]");
}
for TufArtifactReplicationRequest {
time,
target_sled,
operation,
error,
} in status.request_debug_ringbuf.iter()
{
println!(" - target sled: {target_sled}");
println!(" operation: {operation:?}");
println!(
" at: {}",
time.to_rfc3339_opts(SecondsFormat::Secs, true)
);
if let Some(error) = error {
println!(" error: {error}")
}
}
println!(" last run:");
print_counters(status.last_run_counters);
println!(" lifetime:");
print_counters(status.lifetime_counters);
println!(" local repos: {}", status.local_repos);
}
}
}

/// Summarizes an `ActivationReason`
fn reason_str(reason: &ActivationReason) -> &'static str {
match reason {
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ task: "switch_port_config_manager"
manages switch port settings for rack switches


task: "tuf_artifact_replication"
replicate update repo artifacts across sleds


task: "v2p_manager"
manages opte v2p mappings for vpc networking

Expand Down Expand Up @@ -355,6 +359,10 @@ task: "switch_port_config_manager"
manages switch port settings for rack switches


task: "tuf_artifact_replication"
replicate update repo artifacts across sleds


task: "v2p_manager"
manages opte v2p mappings for vpc networking

Expand Down Expand Up @@ -522,6 +530,10 @@ task: "switch_port_config_manager"
manages switch port settings for rack switches


task: "tuf_artifact_replication"
replicate update repo artifacts across sleds


task: "v2p_manager"
manages opte v2p mappings for vpc networking

Expand Down
58 changes: 58 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,10 @@ task: "switch_port_config_manager"
manages switch port settings for rack switches


task: "tuf_artifact_replication"
replicate update repo artifacts across sleds


task: "v2p_manager"
manages opte v2p mappings for vpc networking

Expand Down Expand Up @@ -729,6 +733,33 @@ task: "switch_port_config_manager"
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {})

task: "tuf_artifact_replication"
configured period: every <REDACTED_DURATION>h
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
request ringbuf:
<REDACTED_SECTION>
last run:
list ok: <LIST_OK_REDACTED>
list err: 0
put ok: 0
put err: 0
copy ok: 0
copy err: 0
delete ok: 0
delete err: 0
lifetime:
list ok: <LIST_OK_REDACTED>
list err: 0
put ok: 0
put err: 0
copy ok: 0
copy err: 0
delete ok: 0
delete err: 0
local repos: 0

task: "v2p_manager"
configured period: every <REDACTED_DURATION>s
currently executing: no
Expand Down Expand Up @@ -1193,6 +1224,33 @@ task: "switch_port_config_manager"
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {})

task: "tuf_artifact_replication"
configured period: every <REDACTED_DURATION>h
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
request ringbuf:
<REDACTED_SECTION>
last run:
list ok: <LIST_OK_REDACTED>
list err: 0
put ok: 0
put err: 0
copy ok: 0
copy err: 0
delete ok: 0
delete err: 0
lifetime:
list ok: <LIST_OK_REDACTED>
list err: 0
put ok: 0
put err: 0
copy ok: 0
copy err: 0
delete ok: 0
delete err: 0
local repos: 0

task: "v2p_manager"
configured period: every <REDACTED_DURATION>s
currently executing: no
Expand Down
8 changes: 8 additions & 0 deletions dev-tools/omdb/tests/test_all_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,14 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) {
redactor.extra_variable_length("cockroachdb_version", &crdb_version);
}

// The `tuf_artifact_replication` task's output depends on how
// many sleds happened to register with Nexus before its first
// execution. These redactions work around the issue described in
// https://github.com/oxidecomputer/omicron/issues/7417.
redactor
.field("list ok:", r"\d+")
.section(&["task: \"tuf_artifact_replication\"", "request ringbuf:"]);

for args in invocations {
println!("running commands with args: {:?}", args);
let p = postgres_url.to_string();
Expand Down
22 changes: 12 additions & 10 deletions dev-tools/omdb/tests/usage_errors.out
Original file line number Diff line number Diff line change
Expand Up @@ -315,17 +315,19 @@ Options:
Show sleds that match the given filter

Possible values:
- all: All sleds in the system, regardless of policy or state
- commissioned: All sleds that are currently part of the control plane cluster
- decommissioned: All sleds that were previously part of the control plane cluster
but have been decommissioned
- discretionary: Sleds that are eligible for discretionary services
- in-service: Sleds that are in service (even if they might not be eligible
- all: All sleds in the system, regardless of policy or state
- commissioned: All sleds that are currently part of the control plane cluster
- decommissioned: All sleds that were previously part of the control plane
cluster but have been decommissioned
- discretionary: Sleds that are eligible for discretionary services
- in-service: Sleds that are in service (even if they might not be eligible
for discretionary services)
- query-during-inventory: Sleds whose sled agents should be queried for inventory
- reservation-create: Sleds on which reservations can be created
- vpc-routing: Sleds which should be sent OPTE V2P mappings and Routing rules
- vpc-firewall: Sleds which should be sent VPC firewall rules
- query-during-inventory: Sleds whose sled agents should be queried for inventory
- reservation-create: Sleds on which reservations can be created
- vpc-routing: Sleds which should be sent OPTE V2P mappings and Routing rules
- vpc-firewall: Sleds which should be sent VPC firewall rules
- tuf-artifact-replication: Sleds which should have TUF repo artifacts replicated onto
them

--log-level <LOG_LEVEL>
log level filter
Expand Down
22 changes: 22 additions & 0 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,8 @@ pub struct BackgroundTaskConfig {
/// configuration for region snapshot replacement finisher task
pub region_snapshot_replacement_finish:
RegionSnapshotReplacementFinishConfig,
/// configuration for TUF artifact replication task
pub tuf_artifact_replication: TufArtifactReplicationConfig,
}

#[serde_as]
Expand Down Expand Up @@ -722,6 +724,17 @@ pub struct RegionSnapshotReplacementFinishConfig {
pub period_secs: Duration,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct TufArtifactReplicationConfig {
/// period (in seconds) for periodic activations of this background task
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
/// The number of sleds that artifacts must be present on before a local
/// copy of a repo's artifacts is dropped.
pub min_sled_replication: usize,
}

/// Configuration for a nexus server
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
pub struct PackageConfig {
Expand Down Expand Up @@ -978,6 +991,8 @@ mod test {
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
region_snapshot_replacement_finish.period_secs = 30
tuf_artifact_replication.period_secs = 300
tuf_artifact_replication.min_sled_replication = 3
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -1174,6 +1189,11 @@ mod test {
RegionSnapshotReplacementFinishConfig {
period_secs: Duration::from_secs(30),
},
tuf_artifact_replication:
TufArtifactReplicationConfig {
period_secs: Duration::from_secs(300),
min_sled_replication: 3,
},
},
default_region_allocation_strategy:
crate::nexus_config::RegionAllocationStrategy::Random {
Expand Down Expand Up @@ -1257,6 +1277,8 @@ mod test {
region_snapshot_replacement_garbage_collection.period_secs = 30
region_snapshot_replacement_step.period_secs = 30
region_snapshot_replacement_finish.period_secs = 30
tuf_artifact_replication.period_secs = 300
tuf_artifact_replication.min_sled_replication = 3
[default_region_allocation_strategy]
type = "random"
"##,
Expand Down
1 change: 1 addition & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,7 @@ table! {
sled_policy -> crate::sled_policy::SledPolicyEnum,
sled_state -> crate::SledStateEnum,
sled_agent_gen -> Int8,
repo_depot_port -> Int4,
}
}

Expand Down
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::collections::BTreeMap;
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(121, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(122, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(122, "tuf-artifact-replication"),
KnownVersion::new(121, "dataset-to-crucible-dataset"),
KnownVersion::new(120, "rendezvous-debug-dataset"),
KnownVersion::new(119, "tuf-artifact-key-uuid"),
Expand Down
10 changes: 10 additions & 0 deletions nexus/db-model/src/sled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ pub struct Sled {
/// This is specifically distinct from `rcgen`, which is incremented by
/// child resources as part of `DatastoreCollectionConfig`.
pub sled_agent_gen: Generation,

// ServiceAddress (Repo Depot API). Uses `ip`.
pub repo_depot_port: SqlU16,
}

impl Sled {
Expand Down Expand Up @@ -169,6 +172,7 @@ impl From<Sled> for params::SledAgentInfo {
};
Self {
sa_address: sled.address(),
repo_depot_port: sled.repo_depot_port.into(),
role,
baseboard: Baseboard {
serial: sled.serial_number.clone(),
Expand Down Expand Up @@ -220,6 +224,9 @@ pub struct SledUpdate {
pub ip: ipv6::Ipv6Addr,
pub port: SqlU16,

// ServiceAddress (Repo Depot API). Uses `ip`.
pub repo_depot_port: SqlU16,

// Generation number - owned and incremented by sled-agent.
pub sled_agent_gen: Generation,
}
Expand All @@ -228,6 +235,7 @@ impl SledUpdate {
pub fn new(
id: Uuid,
addr: SocketAddrV6,
repo_depot_port: u16,
baseboard: SledBaseboard,
hardware: SledSystemHardware,
rack_id: Uuid,
Expand All @@ -247,6 +255,7 @@ impl SledUpdate {
reservoir_size: hardware.reservoir_size,
ip: addr.ip().into(),
port: addr.port().into(),
repo_depot_port: repo_depot_port.into(),
sled_agent_gen,
}
}
Expand Down Expand Up @@ -282,6 +291,7 @@ impl SledUpdate {
reservoir_size: self.reservoir_size,
ip: self.ip,
port: self.port,
repo_depot_port: self.repo_depot_port,
last_used_address,
sled_agent_gen: self.sled_agent_gen,
}
Expand Down
1 change: 1 addition & 0 deletions nexus/db-queries/src/db/datastore/crucible_dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ mod test {
let sled = SledUpdate::new(
*sled_id.as_untyped_uuid(),
"[::1]:0".parse().unwrap(),
0,
SledBaseboard {
serial_number: "test-sn".to_string(),
part_number: "test-pn".to_string(),
Expand Down
Loading
Loading