Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions nexus/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ dropshot.workspace = true
fatfs.workspace = true
futures.workspace = true
gateway-client.workspace = true
gateway-types.workspace = true
headers.workspace = true
hex.workspace = true
hickory-resolver.workspace = true
Expand Down
155 changes: 131 additions & 24 deletions nexus/src/app/background/tasks/support_bundle_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ use futures::stream::FuturesUnordered;
use gateway_client::Client as MgsClient;
use gateway_client::types::SpIdentifier;
use gateway_client::types::SpIgnition;
use gateway_types::component::SpType;
use internal_dns_resolver::Resolver;
use internal_dns_types::names::ServiceName;
use nexus_db_model::Ereport;
use nexus_db_model::Sled;
use nexus_db_model::SupportBundle;
use nexus_db_model::SupportBundleState;
use nexus_db_queries::authz;
Expand All @@ -47,9 +49,11 @@ use omicron_uuid_kinds::SledUuid;
use omicron_uuid_kinds::SupportBundleUuid;
use omicron_uuid_kinds::ZpoolUuid;
use parallel_task_set::ParallelTaskSet;
use serde::Serialize;
use serde_json::json;
use sha2::{Digest, Sha256};
use slog_error_chain::InlineErrorChain;
use std::collections::BTreeMap;
use std::future::Future;
use std::io::Write;
use std::num::NonZeroU64;
Expand All @@ -61,6 +65,7 @@ use tokio::io::AsyncWriteExt;
use tokio::io::SeekFrom;
use tokio_util::task::AbortOnDropHandle;
use tufaceous_artifact::ArtifactHash;
use uuid::Uuid;
use zip::ZipArchive;
use zip::ZipWriter;
use zip::write::FullFileOptions;
Expand Down Expand Up @@ -707,23 +712,44 @@ impl BundleCollection {
None
};

let sp_dumps_dir = dir.path().join("sp_task_dumps");
tokio::fs::create_dir_all(&sp_dumps_dir).await.with_context(|| {
format!("failed to create SP task dump directory {sp_dumps_dir}")
})?;
if let Err(e) =
save_all_sp_dumps(log, &self.resolver, &sp_dumps_dir).await
{
error!(log, "failed to capture SP task dumps"; "error" => InlineErrorChain::new(e.as_ref()));
} else {
report.listed_sps = true;
};

if let Ok(all_sleds) = self
let all_sleds = self
.datastore
.sled_list_all_batched(&self.opctx, SledFilter::InService)
.await;

if let Ok(mgs_client) = self.create_mgs_client().await {
if let Err(e) = write_sled_info(
&self.log,
&mgs_client,
all_sleds.as_deref().ok(),
dir.path(),
)
.await
{
{
error!(log, "Failed to write sled_info.json"; "error" => InlineErrorChain::new(e.as_ref()));
}

let sp_dumps_dir = dir.path().join("sp_task_dumps");
tokio::fs::create_dir_all(&sp_dumps_dir).await.with_context(
|| {
format!(
"Failed to create SP task dump directory {sp_dumps_dir}"
)
},
)?;

if let Err(e) =
save_all_sp_dumps(log, &mgs_client, &sp_dumps_dir).await
{
error!(log, "Failed to capture SP task dumps"; "error" => InlineErrorChain::new(e.as_ref()));
} else {
report.listed_sps = true;
};
} else {
warn!(log, "No MGS client, skipping SP task dump collection");
}

if let Ok(all_sleds) = all_sleds {
report.listed_in_service_sleds = true;

const MAX_CONCURRENT_SLED_REQUESTS: usize = 16;
Expand Down Expand Up @@ -1031,6 +1057,20 @@ impl BundleCollection {
);
Ok(())
}

async fn create_mgs_client(&self) -> anyhow::Result<MgsClient> {
self
.resolver
.lookup_socket_v6(ServiceName::ManagementGatewayService)
.await
.map(|sockaddr| {
let url = format!("http://{}", sockaddr);
gateway_client::Client::new(&url, self.log.clone())
}).map_err(|e| {
error!(self.log, "failed to resolve MGS address"; "error" => InlineErrorChain::new(&e));
e.into()
})
}
}

impl BackgroundTask for SupportBundleCollector {
Expand Down Expand Up @@ -1316,18 +1356,9 @@ where
/// Collect task dumps from all SPs via MGS and save them to a directory.
async fn save_all_sp_dumps(
log: &slog::Logger,
resolver: &Resolver,
mgs_client: &MgsClient,
sp_dumps_dir: &Utf8Path,
) -> anyhow::Result<()> {
let mgs_client = resolver
.lookup_socket_v6(ServiceName::ManagementGatewayService)
.await
.map(|sockaddr| {
let url = format!("http://{}", sockaddr);
gateway_client::Client::new(&url, log.clone())
})
.context("failed to resolve address of MGS")?;

let available_sps = get_available_sps(&mgs_client).await?;

let mut tasks = ParallelTaskSet::new();
Expand Down Expand Up @@ -1412,6 +1443,82 @@ async fn save_sp_dumps(
Ok(())
}

/// Write a file with a JSON mapping of sled serial numbers to cubby and UUIDs for easier
/// identification of sleds present in a bundle.
async fn write_sled_info(
log: &slog::Logger,
mgs_client: &MgsClient,
nexus_sleds: Option<&[Sled]>,
dir: &Utf8Path,
) -> anyhow::Result<()> {
#[derive(Serialize)]
struct SledInfo {
cubby: Option<u16>,
uuid: Option<Uuid>,
}

let available_sps = get_available_sps(&mgs_client)
.await
.context("failed to get available SPs")?;

// We can still get a useful mapping of cubby to serial using just the data from MGS.
let mut nexus_map: BTreeMap<_, _> = nexus_sleds
.unwrap_or_default()
.into_iter()
.map(|sled| (sled.serial_number(), sled))
.collect();

let mut sled_info = BTreeMap::new();
for sp in
available_sps.into_iter().filter(|sp| matches!(sp.type_, SpType::Sled))
{
let sp_state = match mgs_client.sp_get(&sp.type_, sp.slot).await {
Ok(s) => s.into_inner(),
Err(e) => {
error!(log,
"Failed to get SP state for sled_info.json";
"cubby" => sp.slot,
"component" => %sp.type_,
"error" => InlineErrorChain::new(&e)
);
continue;
}
};

if let Some(sled) = nexus_map.remove(sp_state.serial_number.as_str()) {
sled_info.insert(
sp_state.serial_number.to_string(),
SledInfo {
cubby: Some(sp.slot),
uuid: Some(*sled.identity.id.as_untyped_uuid()),
},
);
} else {
sled_info.insert(
sp_state.serial_number.to_string(),
SledInfo { cubby: Some(sp.slot), uuid: None },
);
}
}

// Sleds not returned by MGS.
for (serial, sled) in nexus_map {
sled_info.insert(
serial.to_string(),
SledInfo {
cubby: None,
uuid: Some(*sled.identity.id.as_untyped_uuid()),
},
);
}

let json = serde_json::to_string_pretty(&sled_info)
.context("failed to serialize sled info to JSON")?;
tokio::fs::write(dir.join("sled_info.json"), json).await?;

Ok(())
}

fn is_fs_safe_single_path_component(s: &str) -> bool {
// Might be path traversal...
if s == "." || s == ".." {
Expand Down
Loading