@@ -8,21 +8,22 @@ use anyhow::Context;
88use anyhow:: bail;
99use cockroach_admin_client:: types:: NodeDecommission ;
1010use cockroach_admin_client:: types:: NodeId ;
11+ use futures:: Stream ;
1112use futures:: StreamExt ;
13+ use futures:: future:: Either ;
1214use futures:: stream;
1315use internal_dns_resolver:: Resolver ;
1416use internal_dns_types:: names:: ServiceName ;
1517use nexus_db_queries:: context:: OpContext ;
1618use nexus_db_queries:: db:: DataStore ;
1719use nexus_db_queries:: db:: datastore:: CollectorReassignment ;
1820use nexus_types:: deployment:: Blueprint ;
19- use nexus_types:: deployment:: BlueprintZoneConfig ;
2021use nexus_types:: deployment:: BlueprintZoneDisposition ;
21- use nexus_types:: deployment:: BlueprintZoneType ;
2222use omicron_common:: address:: COCKROACH_ADMIN_PORT ;
2323use omicron_uuid_kinds:: GenericUuid ;
2424use omicron_uuid_kinds:: OmicronZoneUuid ;
2525use omicron_uuid_kinds:: SledUuid ;
26+ use sled_agent_types:: inventory:: ZoneKind ;
2627use slog:: Logger ;
2728use slog:: info;
2829use slog:: warn;
@@ -45,96 +46,116 @@ pub(crate) async fn clean_up_expunged_zones<R: CleanupResolver>(
4546 // cockroach-admin knows how to gate decommissioning correctly.
4647 let decommission_cockroach = false ;
4748
48- clean_up_expunged_zones_impl (
49- opctx,
50- datastore,
51- resolver,
52- decommission_cockroach,
53- blueprint
54- . all_omicron_zones ( BlueprintZoneDisposition :: is_ready_for_cleanup) ,
55- )
56- . await
57- }
49+ let cockroach_cleanups = if decommission_cockroach {
50+ Either :: Left ( clean_up_expunged_cockroach_zones (
51+ opctx, datastore, resolver, blueprint,
52+ ) )
53+ } else {
54+ Either :: Right ( stream:: empty ( ) )
55+ } ;
5856
59- async fn clean_up_expunged_zones_impl < R : CleanupResolver > (
60- opctx : & OpContext ,
61- datastore : & DataStore ,
62- resolver : & R ,
63- decommission_cockroach : bool ,
64- zones_to_clean_up : impl Iterator < Item = ( SledUuid , & BlueprintZoneConfig ) > ,
65- ) -> Result < ( ) , Vec < anyhow:: Error > > {
66- let errors: Vec < anyhow:: Error > = stream:: iter ( zones_to_clean_up)
67- . filter_map ( async |( sled_id, config) | {
68- let log = opctx. log . new ( slog:: o!(
69- "sled_id" => sled_id. to_string( ) ,
70- "zone_id" => config. id. to_string( ) ,
71- "zone_type" => config. zone_type. kind( ) . report_str( ) ,
72- ) ) ;
73-
74- let result = match & config. zone_type {
75- // Zones which need cleanup after expungement.
76- BlueprintZoneType :: Nexus ( _) => Some (
77- datastore
78- . database_nexus_access_delete ( & opctx, config. id )
79- . await
80- . map_err ( |err| anyhow:: anyhow!( err) ) ,
81- ) ,
82- BlueprintZoneType :: CockroachDb ( _) => {
83- if decommission_cockroach {
84- Some (
85- decommission_cockroachdb_node (
86- opctx, datastore, resolver, config. id , & log,
87- )
88- . await ,
89- )
90- } else {
91- None
92- }
93- }
94- BlueprintZoneType :: Oximeter ( _) => Some (
95- oximeter_cleanup ( opctx, datastore, config. id , & log) . await ,
96- ) ,
97-
98- // Zones that may or may not need cleanup work - we haven't
99- // gotten to these yet!
100- BlueprintZoneType :: BoundaryNtp ( _)
101- | BlueprintZoneType :: Clickhouse ( _)
102- | BlueprintZoneType :: ClickhouseKeeper ( _)
103- | BlueprintZoneType :: ClickhouseServer ( _)
104- | BlueprintZoneType :: Crucible ( _)
105- | BlueprintZoneType :: CruciblePantry ( _)
106- | BlueprintZoneType :: ExternalDns ( _)
107- | BlueprintZoneType :: InternalDns ( _)
108- | BlueprintZoneType :: InternalNtp ( _) => {
109- warn ! (
110- log,
111- "unsupported zone type for expungement cleanup; \
112- not performing any cleanup";
113- ) ;
114- None
115- }
116- } ?;
117-
118- match result {
119- Err ( error) => {
120- warn ! (
121- log, "failed to clean up expunged zone" ;
122- "error" => #%error,
123- ) ;
124- Some ( error)
125- }
126- Ok ( ( ) ) => {
127- info ! ( log, "successfully cleaned up after expunged zone" ) ;
128- None
129- }
57+ let errors = cockroach_cleanups
58+ . chain ( clean_up_expunged_nexus_zones ( opctx, datastore, blueprint) )
59+ . chain ( clean_up_expunged_oximeter_zones ( opctx, datastore, blueprint) )
60+ . filter_map ( async |( log, result) | match result {
61+ Ok ( ( ) ) => {
62+ info ! ( log, "successfully cleaned up after expunged zone" ) ;
63+ None
64+ }
65+ Err ( error) => {
66+ warn ! (
67+ log, "failed to clean up expunged zone" ;
68+ InlineErrorChain :: new( & * error) ,
69+ ) ;
70+ Some ( error)
13071 }
13172 } )
132- . collect ( )
73+ . collect :: < Vec < _ > > ( )
13374 . await ;
13475
13576 if errors. is_empty ( ) { Ok ( ( ) ) } else { Err ( errors) }
13677}
13778
79+ fn make_cleanup_logger (
80+ opctx : & OpContext ,
81+ sled_id : SledUuid ,
82+ zone_id : OmicronZoneUuid ,
83+ zone_kind : ZoneKind ,
84+ ) -> Logger {
85+ opctx. log . new ( slog:: o!(
86+ "sled_id" => sled_id. to_string( ) ,
87+ "zone_id" => zone_id. to_string( ) ,
88+ "zone_type" => zone_kind. report_str( ) ,
89+ ) )
90+ }
91+
92+ fn clean_up_expunged_nexus_zones < ' a > (
93+ opctx : & ' a OpContext ,
94+ datastore : & ' a DataStore ,
95+ blueprint : & ' a Blueprint ,
96+ ) -> impl Stream < Item = ( Logger , anyhow:: Result < ( ) > ) > + ' a {
97+ let zones_to_clean_up = blueprint
98+ . all_omicron_zones ( BlueprintZoneDisposition :: is_ready_for_cleanup)
99+ . filter ( |( _sled_id, zone) | zone. zone_type . is_nexus ( ) ) ;
100+
101+ stream:: iter ( zones_to_clean_up) . then ( move |( sled_id, zone) | async move {
102+ let zone_kind = zone. zone_type . kind ( ) ;
103+ assert_eq ! ( zone_kind, ZoneKind :: Nexus ) ;
104+
105+ let log = make_cleanup_logger ( opctx, sled_id, zone. id , zone_kind) ;
106+ let result = datastore
107+ . database_nexus_access_delete ( & opctx, zone. id )
108+ . await
109+ . map_err ( |err| anyhow:: anyhow!( err) ) ;
110+
111+ ( log, result)
112+ } )
113+ }
114+
115+ fn clean_up_expunged_cockroach_zones < ' a , R : CleanupResolver > (
116+ opctx : & ' a OpContext ,
117+ datastore : & ' a DataStore ,
118+ resolver : & ' a R ,
119+ blueprint : & ' a Blueprint ,
120+ ) -> impl Stream < Item = ( Logger , anyhow:: Result < ( ) > ) > + ' a {
121+ let zones_to_clean_up = blueprint
122+ . all_omicron_zones ( BlueprintZoneDisposition :: is_ready_for_cleanup)
123+ . filter ( |( _sled_id, zone) | zone. zone_type . is_cockroach ( ) ) ;
124+
125+ stream:: iter ( zones_to_clean_up) . then ( move |( sled_id, zone) | async move {
126+ let zone_kind = zone. zone_type . kind ( ) ;
127+ assert_eq ! ( zone_kind, ZoneKind :: CockroachDb ) ;
128+
129+ let log = make_cleanup_logger ( opctx, sled_id, zone. id , zone_kind) ;
130+ let result = decommission_cockroachdb_node (
131+ opctx, datastore, resolver, zone. id , & log,
132+ )
133+ . await ;
134+
135+ ( log, result)
136+ } )
137+ }
138+
139+ fn clean_up_expunged_oximeter_zones < ' a > (
140+ opctx : & ' a OpContext ,
141+ datastore : & ' a DataStore ,
142+ blueprint : & ' a Blueprint ,
143+ ) -> impl Stream < Item = ( Logger , anyhow:: Result < ( ) > ) > + ' a {
144+ let zones_to_clean_up = blueprint
145+ . all_omicron_zones ( BlueprintZoneDisposition :: is_ready_for_cleanup)
146+ . filter ( |( _sled_id, zone) | zone. zone_type . is_oximeter ( ) ) ;
147+
148+ stream:: iter ( zones_to_clean_up) . then ( move |( sled_id, zone) | async move {
149+ let zone_kind = zone. zone_type . kind ( ) ;
150+ assert_eq ! ( zone_kind, ZoneKind :: Oximeter ) ;
151+
152+ let log = make_cleanup_logger ( opctx, sled_id, zone. id , zone_kind) ;
153+ let result = oximeter_cleanup ( opctx, datastore, zone. id , & log) . await ;
154+
155+ ( log, result)
156+ } )
157+ }
158+
138159async fn oximeter_cleanup (
139160 opctx : & OpContext ,
140161 datastore : & DataStore ,
@@ -307,15 +328,14 @@ mod test {
307328 use httptest:: responders:: { json_encoded, status_code} ;
308329 use nexus_test_utils_macros:: nexus_test;
309330 use nexus_types:: deployment:: {
310- BlueprintZoneImageSource , blueprint_zone_type,
331+ BlueprintZoneConfig , BlueprintZoneImageSource , BlueprintZoneType ,
332+ blueprint_zone_type,
311333 } ;
312334 use omicron_common:: api:: external:: Generation ;
313335 use omicron_common:: zpool_name:: ZpoolName ;
314336 use omicron_uuid_kinds:: OmicronZoneUuid ;
315- use omicron_uuid_kinds:: SledUuid ;
316337 use omicron_uuid_kinds:: ZpoolUuid ;
317338 use sled_agent_types:: inventory:: OmicronZoneDataset ;
318- use std:: iter;
319339 use uuid:: Uuid ;
320340
321341 type ControlPlaneTestContext =
@@ -325,11 +345,6 @@ mod test {
325345 async fn test_clean_up_cockroach_zones (
326346 cptestctx : & ControlPlaneTestContext ,
327347 ) {
328- // The whole point of this test is to check that we send decommissioning
329- // requests; enable that. (See the real `clean_up_expunged_zones()` for
330- // more context.)
331- let decommission_cockroach = true ;
332-
333348 // Test setup boilerplate.
334349 let nexus = & cptestctx. server . server_context ( ) . nexus ;
335350 let datastore = nexus. datastore ( ) ;
@@ -339,7 +354,6 @@ mod test {
339354 ) ;
340355
341356 // Construct the cockroach zone we're going to try to clean up.
342- let any_sled_id = SledUuid :: new_v4 ( ) ;
343357 let crdb_zone = BlueprintZoneConfig {
344358 disposition : BlueprintZoneDisposition :: Expunged {
345359 as_of_generation : Generation :: new ( ) ,
@@ -379,12 +393,12 @@ mod test {
379393 // otherwise succeed, without attempting to contact our mock admin
380394 // server. (We don't have a good way to confirm the warning was logged,
381395 // so we'll just check for an Ok return and no contact to mock_admin.)
382- clean_up_expunged_zones_impl (
396+ decommission_cockroachdb_node (
383397 & opctx,
384398 datastore,
385399 & fake_resolver,
386- decommission_cockroach ,
387- iter :: once ( ( any_sled_id , & crdb_zone ) ) ,
400+ crdb_zone . id ,
401+ & opctx . log ,
388402 )
389403 . await
390404 . expect ( "unknown node ID: no cleanup" ) ;
@@ -426,12 +440,12 @@ mod test {
426440 ) ;
427441 } ;
428442 add_decommission_expecation ( & mut mock_admin) ;
429- clean_up_expunged_zones_impl (
443+ decommission_cockroachdb_node (
430444 & opctx,
431445 datastore,
432446 & fake_resolver,
433- decommission_cockroach ,
434- iter :: once ( ( any_sled_id , & crdb_zone ) ) ,
447+ crdb_zone . id ,
448+ & opctx . log ,
435449 )
436450 . await
437451 . expect ( "decommissioned test node" ) ;
@@ -458,17 +472,15 @@ mod test {
458472 add_decommission_failure_expecation ( & mut mock_bad2) ;
459473 let mut fake_resolver =
460474 FixedResolver ( vec ! [ mock_bad1. addr( ) , mock_bad2. addr( ) ] ) ;
461- let mut err = clean_up_expunged_zones_impl (
475+ let err = decommission_cockroachdb_node (
462476 & opctx,
463477 datastore,
464478 & fake_resolver,
465- decommission_cockroach ,
466- iter :: once ( ( any_sled_id , & crdb_zone ) ) ,
479+ crdb_zone . id ,
480+ & opctx . log ,
467481 )
468482 . await
469483 . expect_err ( "no successful response should result in failure" ) ;
470- assert_eq ! ( err. len( ) , 1 ) ;
471- let err = err. pop ( ) . unwrap ( ) ;
472484 assert_eq ! (
473485 err. to_string( ) ,
474486 format!(
@@ -487,12 +499,12 @@ mod test {
487499 add_decommission_failure_expecation ( & mut mock_bad2) ;
488500 add_decommission_expecation ( & mut mock_admin) ;
489501 fake_resolver. 0 . push ( mock_admin. addr ( ) ) ;
490- clean_up_expunged_zones_impl (
502+ decommission_cockroachdb_node (
491503 & opctx,
492504 datastore,
493505 & fake_resolver,
494- decommission_cockroach ,
495- iter :: once ( ( any_sled_id , & crdb_zone ) ) ,
506+ crdb_zone . id ,
507+ & opctx . log ,
496508 )
497509 . await
498510 . expect ( "decommissioned test node" ) ;
0 commit comments