@@ -69,6 +69,7 @@ use ic_utils_thread::{JoinOnDrop, deallocator_thread::DeallocatorThread};
6969use ic_wasm_types:: ModuleLoadingStatus ;
7070use prometheus:: { Histogram , HistogramVec , IntCounter , IntCounterVec , IntGauge , IntGaugeVec } ;
7171use prost:: Message ;
72+ use std:: cmp:: min;
7273use std:: convert:: { From , TryFrom } ;
7374use std:: fs:: File ;
7475use std:: fs:: OpenOptions ;
@@ -116,6 +117,10 @@ const CRITICAL_ERROR_REPLICATED_STATE_ALTERED_AFTER_CHECKPOINT: &str =
116117/// How long to keep archived and diverged states.
117118const ARCHIVED_DIVERGED_CHECKPOINT_MAX_AGE : Duration = Duration :: from_secs ( 30 * 24 * 60 * 60 ) ; // 30 days
118119
120+ /// The maximum number of consecutive rounds for which the optimization of
121+ /// skipping state cloning and certification metadata computation triggers.
122+ const MAX_CONSECUTIVE_ROUNDS_WITHOUT_STATE_CLONING : u64 = 10 ;
123+
119124/// Write an overlay file this many rounds before each checkpoint.
120125pub const NUM_ROUNDS_BEFORE_CHECKPOINT_TO_WRITE_OVERLAY : u64 = 50 ;
121126
@@ -173,6 +178,8 @@ pub struct StateManagerMetrics {
173178 merge_metrics : MergeMetrics ,
174179 latest_hash_tree_size : IntGauge ,
175180 latest_hash_tree_max_index : IntGauge ,
181+ latest_subnet_certified_height : IntGauge ,
182+ no_state_clone_count : IntCounter ,
176183 tip_hash_count : IntCounter ,
177184}
178185
@@ -459,6 +466,16 @@ impl StateManagerMetrics {
459466 "Largest index in the latest hash tree." ,
460467 ) ;
461468
469+ let latest_subnet_certified_height = metrics_registry. int_gauge (
470+ "state_manager_latest_subnet_certified_height" ,
471+ "Height of the latest validated certification." ,
472+ ) ;
473+
474+ let no_state_clone_count = metrics_registry. int_counter (
475+ "state_manager_no_state_clone_count" ,
476+ "Number of heights whose states were not cloned and not stored by this node." ,
477+ ) ;
478+
462479 let tip_hash_count = metrics_registry. int_counter (
463480 "state_manager_tip_hash_count" ,
464481 "Number of tip heights whose state snapshot was not stored by this node and whose state hash was computed by this node." ,
@@ -489,6 +506,8 @@ impl StateManagerMetrics {
489506 merge_metrics : MergeMetrics :: new ( metrics_registry) ,
490507 latest_hash_tree_size,
491508 latest_hash_tree_max_index,
509+ latest_subnet_certified_height,
510+ no_state_clone_count,
492511 tip_hash_count,
493512 }
494513 }
@@ -916,6 +935,7 @@ pub struct StateManagerImpl {
916935 // requested quite often and this causes high contention on the lock.
917936 latest_state_height : AtomicU64 ,
918937 latest_certified_height : AtomicU64 ,
938+ latest_subnet_certified_height : AtomicU64 ,
919939 persist_metadata_guard : Arc < Mutex < ( ) > > ,
920940 tip_channel : Sender < TipRequest > ,
921941 _tip_thread_handle : JoinOnDrop < ( ) > ,
@@ -1478,6 +1498,7 @@ impl StateManagerImpl {
14781498
14791499 let latest_state_height = AtomicU64 :: new ( 0 ) ;
14801500 let latest_certified_height = AtomicU64 :: new ( 0 ) ;
1501+ let latest_subnet_certified_height = AtomicU64 :: new ( 0 ) ;
14811502
14821503 let initial_snapshot = Snapshot {
14831504 height : Self :: INITIAL_STATE_HEIGHT ,
@@ -1592,6 +1613,7 @@ impl StateManagerImpl {
15921613 deallocator_thread,
15931614 latest_state_height,
15941615 latest_certified_height,
1616+ latest_subnet_certified_height,
15951617 persist_metadata_guard,
15961618 tip_channel,
15971619 _tip_thread_handle,
@@ -1939,8 +1961,8 @@ impl StateManagerImpl {
19391961 } else {
19401962 info ! (
19411963 self . log,
1942- "The previous certification metadata at height {} has been removed . This can happen when the replica \
1943- syncs a newer state concurrently and removes the states below.",
1964+ "The previous certification metadata at height {} are not available . This can happen when the replica \
1965+ (i) catches up or (ii) syncs a newer state concurrently and removes the states below.",
19441966 prev_height,
19451967 ) ;
19461968 }
@@ -2688,10 +2710,6 @@ impl StateManager for StateManagerImpl {
26882710 } else {
26892711 std:: mem:: drop ( states) ;
26902712
2691- if tip_height != Self :: INITIAL_STATE_HEIGHT {
2692- fatal ! ( self . log, "Bug: missing tip metadata @{}" , tip_height) ;
2693- }
2694-
26952713 let mut tip_certification_metadata =
26962714 Self :: compute_certification_metadata ( & self . metrics , & self . log , & tip)
26972715 . unwrap_or_else ( |err| {
@@ -2987,9 +3005,9 @@ impl StateManager for StateManagerImpl {
29873005 hash, certification. signed. content. hash
29883006 ) ;
29893007 }
3008+
29903009 let latest_certified =
29913010 update_latest_height ( & self . latest_certified_height , certification. height ) ;
2992-
29933011 self . metrics
29943012 . latest_certified_height
29953013 . set ( latest_certified as i64 ) ;
@@ -3107,9 +3125,15 @@ impl StateManager for StateManagerImpl {
31073125
31083126 /// Variant of `remove_states_below()` that only removes states committed with
31093127 /// partial certification scope.
3128+ /// The specified `requested_height` is expected to be the *latest certified height*
3129+ /// of the subnet, i.e., the latest height for which a valid certification is available
3130+ /// to consensus.
3131+ /// There are no guarantees for future heights in `extra_heights_to_keep`
3132+ /// w.r.t. the height of the latest state snapshot stored by the state manager, i.e.,
3133+ /// for heights greater than `self.latest_state_height`.
31103134 ///
31113135 /// The following states are NOT removed:
3112- /// * Any state with height >= requested_height
3136+ /// * Any state with height >= min( requested_height, latest state height)
31133137 /// * Checkpoint heights
31143138 /// * The latest state
31153139 /// * The latest certified state
@@ -3126,6 +3150,14 @@ impl StateManager for StateManagerImpl {
31263150 . with_label_values ( & [ "remove_inmemory_states_below" ] )
31273151 . start_timer ( ) ;
31283152
3153+ let latest_subnet_certified_height =
3154+ update_latest_height ( & self . latest_subnet_certified_height , requested_height) ;
3155+ self . metrics
3156+ . latest_subnet_certified_height
3157+ . set ( latest_subnet_certified_height as i64 ) ;
3158+
3159+ let requested_height = min ( requested_height, self . latest_state_height ( ) ) ;
3160+
31293161 // The latest state must be kept.
31303162 let latest_state_height = self . latest_state_height ( ) ;
31313163 let oldest_height_to_keep = latest_state_height
@@ -3184,10 +3216,6 @@ impl StateManager for StateManagerImpl {
31843216 . with_label_values ( & [ "commit_and_certify" ] )
31853217 . start_timer ( ) ;
31863218
3187- self . metrics
3188- . tip_handler_queue_length
3189- . set ( self . tip_channel . len ( ) as i64 ) ;
3190-
31913219 self . populate_extra_metadata ( & mut state, height) ;
31923220
31933221 if let CertificationScope :: Metadata = scope {
@@ -3206,6 +3234,50 @@ impl StateManager for StateManagerImpl {
32063234 }
32073235 }
32083236
3237+ let assert_tip_is_none = |states : & SharedState | {
3238+ // The following assert validates that we don't have two clients
3239+ // modifying TIP at the same time and that each commit_and_certify()
3240+ // is preceded by a call to take_tip().
3241+ if let Some ( ( tip_height, _) ) = & states. tip {
3242+ fatal ! (
3243+ self . log,
3244+ "Attempt to commit state not borrowed from this StateManager, height = {}, tip_height = {}" ,
3245+ height,
3246+ tip_height,
3247+ ) ;
3248+ }
3249+ } ;
3250+
3251+ // If the node is catching up (`height.get() < latest_subnet_certified_height`)
3252+ // and this is not a checkpoint height (`matches!(scope, CertificationScope::Metadata)`),
3253+ // then we do not clone, do not hash, and do not store the state and certification metadata.
3254+ // This optimization is skipped every `MAX_CONSECUTIVE_ROUNDS_WITHOUT_STATE_CLONING` heights
3255+ // so that we always have a reasonably "recent" state snapshot and
3256+ // its certification metadata available.
3257+ let latest_subnet_certified_height =
3258+ self . latest_subnet_certified_height . load ( Ordering :: Relaxed ) ;
3259+ if matches ! ( scope, CertificationScope :: Metadata )
3260+ && height. get ( ) < latest_subnet_certified_height
3261+ && !height
3262+ . get ( )
3263+ . is_multiple_of ( MAX_CONSECUTIVE_ROUNDS_WITHOUT_STATE_CLONING )
3264+ {
3265+ let mut states = self . states . write ( ) ;
3266+ #[ cfg( debug_assertions) ]
3267+ check_certifications_metadata_snapshots_and_states_metadata_are_consistent ( & states) ;
3268+
3269+ assert_tip_is_none ( & states) ;
3270+
3271+ self . metrics . no_state_clone_count . inc ( ) ;
3272+
3273+ states. tip = Some ( ( height, state) ) ;
3274+ return ;
3275+ }
3276+
3277+ self . metrics
3278+ . tip_handler_queue_length
3279+ . set ( self . tip_channel . len ( ) as i64 ) ;
3280+
32093281 let mut state_metadata_and_compute_manifest_request: Option < ( StateMetadata , TipRequest ) > =
32103282 None ;
32113283 let mut follow_up_tip_requests = Vec :: new ( ) ;
@@ -3254,17 +3326,7 @@ impl StateManager for StateManagerImpl {
32543326 #[ cfg( debug_assertions) ]
32553327 check_certifications_metadata_snapshots_and_states_metadata_are_consistent ( & states) ;
32563328
3257- // The following assert validates that we don't have two clients
3258- // modifying TIP at the same time and that each commit_and_certify()
3259- // is preceded by a call to take_tip().
3260- if let Some ( ( tip_height, _) ) = & states. tip {
3261- fatal ! (
3262- self . log,
3263- "Attempt to commit state not borrowed from this StateManager, height = {}, tip_height = {}" ,
3264- height,
3265- tip_height,
3266- ) ;
3267- }
3329+ assert_tip_is_none ( & states) ;
32683330
32693331 // It's possible that we already computed this state before. We
32703332 // validate that hashes agree to spot bugs causing non-determinism as
@@ -3996,6 +4058,27 @@ pub mod testing {
39964058
39974059 /// Testing only: Wait till deallocation queue is empty.
39984060 fn flush_deallocation_channel ( & self ) ;
4061+
4062+ /// Testing only: Returns heights in `states.snapshots`.
4063+ fn state_snapshot_heights ( & self ) -> Vec < Height > ;
4064+
4065+ /// Testing only: Returns state at a given height in `states.snapshots`.
4066+ fn state_snapshot ( & self , height : Height ) -> Arc < ReplicatedState > ;
4067+
4068+ /// Testing only: Returns heights in `states.certifications_metadata`.
4069+ fn certifications_metadata_heights ( & self ) -> Vec < Height > ;
4070+
4071+ /// Testing only: Returns hash tree at a given height in `states.certifications_metadata`.
4072+ fn certifications_metadata_hash_tree ( & self , height : Height ) -> Option < Arc < HashTree > > ;
4073+
4074+ /// Testing only: Returns state hash at a given height in `states.certifications_metadata`.
4075+ fn certifications_metadata_state_hash ( & self , height : Height ) -> CryptoHash ;
4076+
4077+ /// Testing only: Returns certification at a given height in `states.certifications_metadata`.
4078+ fn certifications_metadata_certification ( & self , height : Height ) -> Option < Certification > ;
4079+
4080+ /// Testing only: Returns `latest_subnet_certified_height`.
4081+ fn latest_subnet_certified_height ( & self ) -> u64 ;
39994082 }
40004083
40014084 impl StateManagerTesting for StateManagerImpl {
@@ -4023,5 +4106,61 @@ pub mod testing {
40234106 fn flush_deallocation_channel ( & self ) {
40244107 self . deallocator_thread . flush_deallocation_channel ( ) ;
40254108 }
4109+
4110+ fn state_snapshot_heights ( & self ) -> Vec < Height > {
4111+ let states = self . states . read ( ) ;
4112+ states. snapshots . iter ( ) . map ( |s| s. height ) . collect ( )
4113+ }
4114+
4115+ fn state_snapshot ( & self , height : Height ) -> Arc < ReplicatedState > {
4116+ let states = self . states . read ( ) ;
4117+ states
4118+ . snapshots
4119+ . iter ( )
4120+ . find ( |s| s. height == height)
4121+ . expect ( "Did not find state at given height" )
4122+ . state
4123+ . clone ( )
4124+ }
4125+
4126+ fn certifications_metadata_heights ( & self ) -> Vec < Height > {
4127+ let states = self . states . read ( ) ;
4128+ states. certifications_metadata . keys ( ) . cloned ( ) . collect ( )
4129+ }
4130+
4131+ fn certifications_metadata_hash_tree ( & self , height : Height ) -> Option < Arc < HashTree > > {
4132+ let states = self . states . read ( ) ;
4133+ states
4134+ . certifications_metadata
4135+ . get ( & height)
4136+ . expect ( "Did not find metadata at given height" )
4137+ . hash_tree
4138+ . clone ( )
4139+ . map ( |( hash_tree, _) | hash_tree)
4140+ }
4141+
4142+ fn certifications_metadata_state_hash ( & self , height : Height ) -> CryptoHash {
4143+ let states = self . states . read ( ) ;
4144+ states
4145+ . certifications_metadata
4146+ . get ( & height)
4147+ . expect ( "Did not find metadata at given height" )
4148+ . certified_state_hash
4149+ . clone ( )
4150+ }
4151+
4152+ fn certifications_metadata_certification ( & self , height : Height ) -> Option < Certification > {
4153+ let states = self . states . read ( ) ;
4154+ states
4155+ . certifications_metadata
4156+ . get ( & height)
4157+ . expect ( "Did not find metadata at given height" )
4158+ . certification
4159+ . clone ( )
4160+ }
4161+
4162+ fn latest_subnet_certified_height ( & self ) -> u64 {
4163+ self . latest_subnet_certified_height . load ( Ordering :: Relaxed )
4164+ }
40264165 }
40274166}
0 commit comments