@@ -12,26 +12,21 @@ use crate::common_sp_update::PrecheckError;
12
12
use crate :: common_sp_update:: PrecheckStatus ;
13
13
use crate :: common_sp_update:: error_means_caboose_is_invalid;
14
14
use crate :: mgs_clients:: GatewayClientError ;
15
+ use crate :: rot_updater:: WAIT_FOR_BOOT_INFO_TIMEOUT ;
16
+ use crate :: rot_updater:: wait_for_boot_info;
15
17
use futures:: FutureExt ;
16
18
use futures:: future:: BoxFuture ;
17
19
use gateway_client:: SpComponent ;
18
- use gateway_client:: types:: GetRotBootInfoParams ;
19
20
use gateway_client:: types:: RotImageError ;
20
21
use gateway_client:: types:: RotState ;
21
22
use gateway_client:: types:: SpComponentFirmwareSlot ;
22
23
use gateway_client:: types:: SpType ;
23
- use gateway_messages:: RotBootInfo ;
24
24
use nexus_types:: deployment:: PendingMgsUpdate ;
25
25
use nexus_types:: deployment:: PendingMgsUpdateRotBootloaderDetails ;
26
26
use slog:: Logger ;
27
- use slog:: { debug, error, info } ;
27
+ use slog:: { debug, error} ;
28
28
use slog_error_chain:: InlineErrorChain ;
29
29
use std:: time:: Duration ;
30
- use std:: time:: Instant ;
31
-
32
- const WAIT_FOR_BOOT_INFO_TIMEOUT : Duration = Duration :: from_secs ( 120 ) ;
33
-
34
- const WAIT_FOR_BOOT_INFO_INTERVAL : Duration = Duration :: from_secs ( 10 ) ;
35
30
36
31
pub struct ReconfiguratorRotBootloaderUpdater {
37
32
details : PendingMgsUpdateRotBootloaderDetails ,
@@ -193,10 +188,6 @@ impl SpComponentUpdateHelperImpl for ReconfiguratorRotBootloaderUpdater {
193
188
194
189
// We now retrieve boot info from the RoT to verify the reset
195
190
// has completed and signature checks done.
196
- debug ! (
197
- log,
198
- "attempting to retrieve boot info to verify image validity"
199
- ) ;
200
191
let stage0next_error = wait_for_stage0_next_image_check (
201
192
log,
202
193
mgs_clients,
@@ -252,7 +243,10 @@ impl SpComponentUpdateHelperImpl for ReconfiguratorRotBootloaderUpdater {
252
243
} )
253
244
. await ?;
254
245
255
- debug ! ( log, "attempting to reset device to set to new RoT bootloader version" ) ;
246
+ debug ! (
247
+ log,
248
+ "attempting to reset the device to set a new RoT bootloader version" ,
249
+ ) ;
256
250
mgs_clients
257
251
. try_all_serially ( log, move |mgs_client| async move {
258
252
mgs_client
@@ -265,96 +259,62 @@ impl SpComponentUpdateHelperImpl for ReconfiguratorRotBootloaderUpdater {
265
259
} )
266
260
. await ?;
267
261
262
+ // We wait for boot info to ensure a successful reset
263
+ wait_for_boot_info (
264
+ log,
265
+ mgs_clients,
266
+ update. sp_type ,
267
+ update. slot_id ,
268
+ WAIT_FOR_BOOT_INFO_TIMEOUT ,
269
+ )
270
+ . await ?;
268
271
Ok ( ( ) )
269
272
}
270
273
. boxed ( )
271
274
}
272
275
}
273
276
274
277
/// Poll the RoT asking for its boot information. This is used to check
275
- /// state after RoT bootloader updates
278
+ /// the state for RoT bootloader image errors after RoT is reset
276
279
async fn wait_for_stage0_next_image_check (
277
280
log : & Logger ,
278
281
mgs_clients : & mut MgsClients ,
279
282
sp_type : SpType ,
280
283
sp_slot : u16 ,
281
284
timeout : Duration ,
282
285
) -> Result < Option < RotImageError > , PostUpdateError > {
283
- let before = Instant :: now ( ) ;
284
- loop {
285
- match mgs_clients
286
- . try_all_serially ( log, |mgs_client| async move {
287
- mgs_client
288
- . sp_rot_boot_info (
289
- sp_type,
290
- sp_slot,
291
- SpComponent :: ROT . const_as_str ( ) ,
292
- & GetRotBootInfoParams {
293
- version : RotBootInfo :: HIGHEST_KNOWN_VERSION ,
294
- } ,
295
- )
296
- . await
297
- } )
298
- . await
299
- {
300
- Ok ( state) => match state. into_inner ( ) {
301
- // The minimum we will ever return is v3.
302
- // Additionally, V2 does not report image errors, so we cannot
303
- // know with certainty if a signature check came back with errors
304
- RotState :: V2 { .. } => {
305
- let error = "unexpected RoT version: 2" . to_string ( ) ;
306
- error ! (
307
- log,
308
- "failed to get RoT boot info" ;
309
- "error" => & error
310
- ) ;
311
- return Err ( PostUpdateError :: FatalError { error } ) ;
312
- }
313
- RotState :: V3 { stage0next_error, .. } => {
314
- return Ok ( stage0next_error) ;
315
- }
316
- // The RoT is probably still booting
317
- RotState :: CommunicationFailed { message } => {
318
- if before. elapsed ( ) >= timeout {
319
- error ! (
320
- log,
321
- "failed to get RoT boot info" ;
322
- "error" => %message
323
- ) ;
324
- return Err ( PostUpdateError :: FatalError {
325
- error : message,
326
- } ) ;
327
- }
328
-
329
- info ! (
330
- log,
331
- "failed getting RoT boot info (will retry)" ;
332
- "error" => %message,
333
- ) ;
334
- tokio:: time:: sleep ( WAIT_FOR_BOOT_INFO_INTERVAL ) . await ;
335
- }
336
- } ,
337
- // The RoT might still be booting
338
- Err ( error) => {
339
- let e = InlineErrorChain :: new ( & error) ;
340
- if before. elapsed ( ) >= timeout {
341
- error ! (
342
- log,
343
- "failed to get RoT boot info" ;
344
- & e,
345
- ) ;
346
- return Err ( PostUpdateError :: FatalError {
347
- error : e. to_string ( ) ,
348
- } ) ;
349
- }
350
-
351
- info ! (
286
+ debug ! ( log, "attempting to verify image validity" ) ;
287
+ match wait_for_boot_info ( log, mgs_clients, sp_type, sp_slot, timeout) . await
288
+ {
289
+ Ok ( state) => match state {
290
+ // The minimum we will ever return is v3.
291
+ // Additionally, V2 does not report image errors, so we cannot
292
+ // know with certainty if a signature check came back with errors
293
+ RotState :: V2 { .. } => {
294
+ let error = "unexpected RoT version: 2" . to_string ( ) ;
295
+ error ! (
352
296
log,
353
- "failed getting RoT boot info (will retry) " ;
354
- e ,
297
+ "failed to get RoT boot info" ;
298
+ "error" => & error
355
299
) ;
356
- tokio :: time :: sleep ( WAIT_FOR_BOOT_INFO_INTERVAL ) . await ;
300
+ return Err ( PostUpdateError :: FatalError { error } ) ;
357
301
}
358
- }
302
+ RotState :: V3 { stage0next_error, .. } => {
303
+ debug ! ( log, "successfully completed an image signature check" ) ;
304
+ return Ok ( stage0next_error) ;
305
+ }
306
+ // This is unreachable because wait_for_boot_info loops for some
307
+ // time if it encounters `CommunicationFailed`, and if it hits the
308
+ // timeout, it will return an error.
309
+ RotState :: CommunicationFailed { message } => {
310
+ error ! (
311
+ log,
312
+ "failed to get RoT boot info" ;
313
+ "error" => %message
314
+ ) ;
315
+ return Err ( PostUpdateError :: FatalError { error : message } ) ;
316
+ }
317
+ } ,
318
+ Err ( error) => return Err ( error) ,
359
319
}
360
320
}
0 commit comments