@@ -20,9 +20,11 @@ use futures::stream::FuturesUnordered;
20
20
use gateway_client:: Client as MgsClient ;
21
21
use gateway_client:: types:: SpIdentifier ;
22
22
use gateway_client:: types:: SpIgnition ;
23
+ use gateway_types:: component:: SpType ;
23
24
use internal_dns_resolver:: Resolver ;
24
25
use internal_dns_types:: names:: ServiceName ;
25
26
use nexus_db_model:: Ereport ;
27
+ use nexus_db_model:: Sled ;
26
28
use nexus_db_model:: SupportBundle ;
27
29
use nexus_db_model:: SupportBundleState ;
28
30
use nexus_db_queries:: authz;
@@ -47,9 +49,11 @@ use omicron_uuid_kinds::SledUuid;
47
49
use omicron_uuid_kinds:: SupportBundleUuid ;
48
50
use omicron_uuid_kinds:: ZpoolUuid ;
49
51
use parallel_task_set:: ParallelTaskSet ;
52
+ use serde:: Serialize ;
50
53
use serde_json:: json;
51
54
use sha2:: { Digest , Sha256 } ;
52
55
use slog_error_chain:: InlineErrorChain ;
56
+ use std:: collections:: BTreeMap ;
53
57
use std:: future:: Future ;
54
58
use std:: io:: Write ;
55
59
use std:: num:: NonZeroU64 ;
@@ -61,6 +65,7 @@ use tokio::io::AsyncWriteExt;
61
65
use tokio:: io:: SeekFrom ;
62
66
use tokio_util:: task:: AbortOnDropHandle ;
63
67
use tufaceous_artifact:: ArtifactHash ;
68
+ use uuid:: Uuid ;
64
69
use zip:: ZipArchive ;
65
70
use zip:: ZipWriter ;
66
71
use zip:: write:: FullFileOptions ;
@@ -707,23 +712,44 @@ impl BundleCollection {
707
712
None
708
713
} ;
709
714
710
- let sp_dumps_dir = dir. path ( ) . join ( "sp_task_dumps" ) ;
711
- tokio:: fs:: create_dir_all ( & sp_dumps_dir) . await . with_context ( || {
712
- format ! ( "failed to create SP task dump directory {sp_dumps_dir}" )
713
- } ) ?;
714
- if let Err ( e) =
715
- save_all_sp_dumps ( log, & self . resolver , & sp_dumps_dir) . await
716
- {
717
- error ! ( log, "failed to capture SP task dumps" ; "error" => InlineErrorChain :: new( e. as_ref( ) ) ) ;
718
- } else {
719
- report. listed_sps = true ;
720
- } ;
721
-
722
- if let Ok ( all_sleds) = self
715
+ let all_sleds = self
723
716
. datastore
724
717
. sled_list_all_batched ( & self . opctx , SledFilter :: InService )
718
+ . await ;
719
+
720
+ if let Ok ( mgs_client) = self . create_mgs_client ( ) . await {
721
+ if let Err ( e) = write_sled_info (
722
+ & self . log ,
723
+ & mgs_client,
724
+ all_sleds. as_deref ( ) . ok ( ) ,
725
+ dir. path ( ) ,
726
+ )
725
727
. await
726
- {
728
+ {
729
+ error ! ( log, "Failed to write sled_info.json" ; "error" => InlineErrorChain :: new( e. as_ref( ) ) ) ;
730
+ }
731
+
732
+ let sp_dumps_dir = dir. path ( ) . join ( "sp_task_dumps" ) ;
733
+ tokio:: fs:: create_dir_all ( & sp_dumps_dir) . await . with_context (
734
+ || {
735
+ format ! (
736
+ "Failed to create SP task dump directory {sp_dumps_dir}"
737
+ )
738
+ } ,
739
+ ) ?;
740
+
741
+ if let Err ( e) =
742
+ save_all_sp_dumps ( log, & mgs_client, & sp_dumps_dir) . await
743
+ {
744
+ error ! ( log, "Failed to capture SP task dumps" ; "error" => InlineErrorChain :: new( e. as_ref( ) ) ) ;
745
+ } else {
746
+ report. listed_sps = true ;
747
+ } ;
748
+ } else {
749
+ warn ! ( log, "No MGS client, skipping SP task dump collection" ) ;
750
+ }
751
+
752
+ if let Ok ( all_sleds) = all_sleds {
727
753
report. listed_in_service_sleds = true ;
728
754
729
755
const MAX_CONCURRENT_SLED_REQUESTS : usize = 16 ;
@@ -1031,6 +1057,20 @@ impl BundleCollection {
1031
1057
) ;
1032
1058
Ok ( ( ) )
1033
1059
}
1060
+
1061
+ async fn create_mgs_client ( & self ) -> anyhow:: Result < MgsClient > {
1062
+ self
1063
+ . resolver
1064
+ . lookup_socket_v6 ( ServiceName :: ManagementGatewayService )
1065
+ . await
1066
+ . map ( |sockaddr| {
1067
+ let url = format ! ( "http://{}" , sockaddr) ;
1068
+ gateway_client:: Client :: new ( & url, self . log . clone ( ) )
1069
+ } ) . map_err ( |e| {
1070
+ error ! ( self . log, "failed to resolve MGS address" ; "error" => InlineErrorChain :: new( & e) ) ;
1071
+ e. into ( )
1072
+ } )
1073
+ }
1034
1074
}
1035
1075
1036
1076
impl BackgroundTask for SupportBundleCollector {
@@ -1316,18 +1356,9 @@ where
1316
1356
/// Collect task dumps from all SPs via MGS and save them to a directory.
1317
1357
async fn save_all_sp_dumps (
1318
1358
log : & slog:: Logger ,
1319
- resolver : & Resolver ,
1359
+ mgs_client : & MgsClient ,
1320
1360
sp_dumps_dir : & Utf8Path ,
1321
1361
) -> anyhow:: Result < ( ) > {
1322
- let mgs_client = resolver
1323
- . lookup_socket_v6 ( ServiceName :: ManagementGatewayService )
1324
- . await
1325
- . map ( |sockaddr| {
1326
- let url = format ! ( "http://{}" , sockaddr) ;
1327
- gateway_client:: Client :: new ( & url, log. clone ( ) )
1328
- } )
1329
- . context ( "failed to resolve address of MGS" ) ?;
1330
-
1331
1362
let available_sps = get_available_sps ( & mgs_client) . await ?;
1332
1363
1333
1364
let mut tasks = ParallelTaskSet :: new ( ) ;
@@ -1412,6 +1443,82 @@ async fn save_sp_dumps(
1412
1443
Ok ( ( ) )
1413
1444
}
1414
1445
1446
+ /// Write a file with a JSON mapping of sled serial numbers to cubby and UUIDs for easier
1447
+ /// identification of sleds present in a bundle.
1448
+ async fn write_sled_info (
1449
+ log : & slog:: Logger ,
1450
+ mgs_client : & MgsClient ,
1451
+ nexus_sleds : Option < & [ Sled ] > ,
1452
+ dir : & Utf8Path ,
1453
+ ) -> anyhow:: Result < ( ) > {
1454
+ #[ derive( Serialize ) ]
1455
+ struct SledInfo {
1456
+ cubby : Option < u16 > ,
1457
+ uuid : Option < Uuid > ,
1458
+ }
1459
+
1460
+ let available_sps = get_available_sps ( & mgs_client)
1461
+ . await
1462
+ . context ( "failed to get available SPs" ) ?;
1463
+
1464
+ // We can still get a useful mapping of cubby to serial using just the data from MGS.
1465
+ let mut nexus_map: BTreeMap < _ , _ > = nexus_sleds
1466
+ . unwrap_or_default ( )
1467
+ . into_iter ( )
1468
+ . map ( |sled| ( sled. serial_number ( ) , sled) )
1469
+ . collect ( ) ;
1470
+
1471
+ let mut sled_info = BTreeMap :: new ( ) ;
1472
+ for sp in
1473
+ available_sps. into_iter ( ) . filter ( |sp| matches ! ( sp. type_, SpType :: Sled ) )
1474
+ {
1475
+ let sp_state = match mgs_client. sp_get ( & sp. type_ , sp. slot ) . await {
1476
+ Ok ( s) => s. into_inner ( ) ,
1477
+ Err ( e) => {
1478
+ error ! ( log,
1479
+ "Failed to get SP state for sled_info.json" ;
1480
+ "cubby" => sp. slot,
1481
+ "component" => %sp. type_,
1482
+ "error" => InlineErrorChain :: new( & e)
1483
+ ) ;
1484
+ continue ;
1485
+ }
1486
+ } ;
1487
+
1488
+ if let Some ( sled) = nexus_map. remove ( sp_state. serial_number . as_str ( ) ) {
1489
+ sled_info. insert (
1490
+ sp_state. serial_number . to_string ( ) ,
1491
+ SledInfo {
1492
+ cubby : Some ( sp. slot ) ,
1493
+ uuid : Some ( * sled. identity . id . as_untyped_uuid ( ) ) ,
1494
+ } ,
1495
+ ) ;
1496
+ } else {
1497
+ sled_info. insert (
1498
+ sp_state. serial_number . to_string ( ) ,
1499
+ SledInfo { cubby : Some ( sp. slot ) , uuid : None } ,
1500
+ ) ;
1501
+ }
1502
+ }
1503
+
1504
+ // Sleds not returned by MGS.
1505
+ for ( serial, sled) in nexus_map {
1506
+ sled_info. insert (
1507
+ serial. to_string ( ) ,
1508
+ SledInfo {
1509
+ cubby : None ,
1510
+ uuid : Some ( * sled. identity . id . as_untyped_uuid ( ) ) ,
1511
+ } ,
1512
+ ) ;
1513
+ }
1514
+
1515
+ let json = serde_json:: to_string_pretty ( & sled_info)
1516
+ . context ( "failed to serialize sled info to JSON" ) ?;
1517
+ tokio:: fs:: write ( dir. join ( "sled_info.json" ) , json) . await ?;
1518
+
1519
+ Ok ( ( ) )
1520
+ }
1521
+
1415
1522
fn is_fs_safe_single_path_component ( s : & str ) -> bool {
1416
1523
// Might be path traversal...
1417
1524
if s == "." || s == ".." {
0 commit comments