@@ -12,26 +12,21 @@ use crate::common_sp_update::PrecheckError;
1212use crate :: common_sp_update:: PrecheckStatus ;
1313use crate :: common_sp_update:: error_means_caboose_is_invalid;
1414use crate :: mgs_clients:: GatewayClientError ;
15+ use crate :: rot_updater:: WAIT_FOR_BOOT_INFO_TIMEOUT ;
16+ use crate :: rot_updater:: wait_for_boot_info;
1517use futures:: FutureExt ;
1618use futures:: future:: BoxFuture ;
1719use gateway_client:: SpComponent ;
18- use gateway_client:: types:: GetRotBootInfoParams ;
1920use gateway_client:: types:: RotImageError ;
2021use gateway_client:: types:: RotState ;
2122use gateway_client:: types:: SpComponentFirmwareSlot ;
2223use gateway_client:: types:: SpType ;
23- use gateway_messages:: RotBootInfo ;
2424use nexus_types:: deployment:: PendingMgsUpdate ;
2525use nexus_types:: deployment:: PendingMgsUpdateRotBootloaderDetails ;
2626use slog:: Logger ;
27- use slog:: { debug, error, info } ;
27+ use slog:: { debug, error} ;
2828use slog_error_chain:: InlineErrorChain ;
2929use std:: time:: Duration ;
30- use std:: time:: Instant ;
31-
32- const WAIT_FOR_BOOT_INFO_TIMEOUT : Duration = Duration :: from_secs ( 120 ) ;
33-
34- const WAIT_FOR_BOOT_INFO_INTERVAL : Duration = Duration :: from_secs ( 10 ) ;
3530
3631pub struct ReconfiguratorRotBootloaderUpdater {
3732 details : PendingMgsUpdateRotBootloaderDetails ,
@@ -193,10 +188,6 @@ impl SpComponentUpdateHelperImpl for ReconfiguratorRotBootloaderUpdater {
193188
194189 // We now retrieve boot info from the RoT to verify the reset
195190 // has completed and signature checks done.
196- debug ! (
197- log,
198- "attempting to retrieve boot info to verify image validity"
199- ) ;
200191 let stage0next_error = wait_for_stage0_next_image_check (
201192 log,
202193 mgs_clients,
@@ -252,7 +243,10 @@ impl SpComponentUpdateHelperImpl for ReconfiguratorRotBootloaderUpdater {
252243 } )
253244 . await ?;
254245
255- debug ! ( log, "attempting to reset device to set to new RoT bootloader version" ) ;
246+ debug ! (
247+ log,
248+ "attempting to reset the device to set a new RoT bootloader version" ,
249+ ) ;
256250 mgs_clients
257251 . try_all_serially ( log, move |mgs_client| async move {
258252 mgs_client
@@ -265,96 +259,62 @@ impl SpComponentUpdateHelperImpl for ReconfiguratorRotBootloaderUpdater {
265259 } )
266260 . await ?;
267261
262+ // We wait for boot info to ensure a successful reset
263+ wait_for_boot_info (
264+ log,
265+ mgs_clients,
266+ update. sp_type ,
267+ update. slot_id ,
268+ WAIT_FOR_BOOT_INFO_TIMEOUT ,
269+ )
270+ . await ?;
268271 Ok ( ( ) )
269272 }
270273 . boxed ( )
271274 }
272275}
273276
274277/// Poll the RoT asking for its boot information. This is used to check
275- /// state after RoT bootloader updates
278+ /// the state for RoT bootloader image errors after RoT is reset
276279async fn wait_for_stage0_next_image_check (
277280 log : & Logger ,
278281 mgs_clients : & mut MgsClients ,
279282 sp_type : SpType ,
280283 sp_slot : u16 ,
281284 timeout : Duration ,
282285) -> Result < Option < RotImageError > , PostUpdateError > {
283- let before = Instant :: now ( ) ;
284- loop {
285- match mgs_clients
286- . try_all_serially ( log, |mgs_client| async move {
287- mgs_client
288- . sp_rot_boot_info (
289- sp_type,
290- sp_slot,
291- SpComponent :: ROT . const_as_str ( ) ,
292- & GetRotBootInfoParams {
293- version : RotBootInfo :: HIGHEST_KNOWN_VERSION ,
294- } ,
295- )
296- . await
297- } )
298- . await
299- {
300- Ok ( state) => match state. into_inner ( ) {
301- // The minimum we will ever return is v3.
302- // Additionally, V2 does not report image errors, so we cannot
303- // know with certainty if a signature check came back with errors
304- RotState :: V2 { .. } => {
305- let error = "unexpected RoT version: 2" . to_string ( ) ;
306- error ! (
307- log,
308- "failed to get RoT boot info" ;
309- "error" => & error
310- ) ;
311- return Err ( PostUpdateError :: FatalError { error } ) ;
312- }
313- RotState :: V3 { stage0next_error, .. } => {
314- return Ok ( stage0next_error) ;
315- }
316- // The RoT is probably still booting
317- RotState :: CommunicationFailed { message } => {
318- if before. elapsed ( ) >= timeout {
319- error ! (
320- log,
321- "failed to get RoT boot info" ;
322- "error" => %message
323- ) ;
324- return Err ( PostUpdateError :: FatalError {
325- error : message,
326- } ) ;
327- }
328-
329- info ! (
330- log,
331- "failed getting RoT boot info (will retry)" ;
332- "error" => %message,
333- ) ;
334- tokio:: time:: sleep ( WAIT_FOR_BOOT_INFO_INTERVAL ) . await ;
335- }
336- } ,
337- // The RoT might still be booting
338- Err ( error) => {
339- let e = InlineErrorChain :: new ( & error) ;
340- if before. elapsed ( ) >= timeout {
341- error ! (
342- log,
343- "failed to get RoT boot info" ;
344- & e,
345- ) ;
346- return Err ( PostUpdateError :: FatalError {
347- error : e. to_string ( ) ,
348- } ) ;
349- }
350-
351- info ! (
286+ debug ! ( log, "attempting to verify image validity" ) ;
287+ match wait_for_boot_info ( log, mgs_clients, sp_type, sp_slot, timeout) . await
288+ {
289+ Ok ( state) => match state {
290+ // The minimum we will ever return is v3.
291+ // Additionally, V2 does not report image errors, so we cannot
292+ // know with certainty if a signature check came back with errors
293+ RotState :: V2 { .. } => {
294+ let error = "unexpected RoT version: 2" . to_string ( ) ;
295+ error ! (
352296 log,
353- "failed getting RoT boot info (will retry) " ;
354- e ,
297+ "failed to get RoT boot info" ;
298+ "error" => & error
355299 ) ;
356- tokio :: time :: sleep ( WAIT_FOR_BOOT_INFO_INTERVAL ) . await ;
300+ return Err ( PostUpdateError :: FatalError { error } ) ;
357301 }
358- }
302+ RotState :: V3 { stage0next_error, .. } => {
303+ debug ! ( log, "successfully completed an image signature check" ) ;
304+ return Ok ( stage0next_error) ;
305+ }
306+ // This is unreachable because wait_for_boot_info loops for some
307+ // time if it encounters `CommunicationFailed`, and if it hits the
308+ // timeout, it will return an error.
309+ RotState :: CommunicationFailed { message } => {
310+ error ! (
311+ log,
312+ "failed to get RoT boot info" ;
313+ "error" => %message
314+ ) ;
315+ return Err ( PostUpdateError :: FatalError { error : message } ) ;
316+ }
317+ } ,
318+ Err ( error) => return Err ( error) ,
359319 }
360320}
0 commit comments