Skip to content

Commit ed1bed8

Browse files
authored
feat: no state cloning during catch-up (#8411)
This PR optimizes the state manager to not clone the state while the node is catching up. In more detail, this PR performs the following changes: - stores the latest certified height of the subnet (passed from consensus via calls to `StateManagerImpl::remove_inmemory_states_below`) in `StateManagerImpl`; - introduces new state manager metrics (the latest certified height of the subnet and the number of times `StateManagerImpl::commmit_and_certify` did not clone state); - does not clone state and does not compute certification metadata in `StateManagerImpl::commmit_and_certify` if the committed height is below the latest certified height of the subnet, the certification scope for the committed state is set to partial certification (the "metadata" variant), and the committed height is not divisible by 10 (i.e., we skip the optimization every 10th round; to have a "recent" certified state snapshot stored and available throughout the process of catching up).
1 parent f4b6d27 commit ed1bed8

File tree

4 files changed

+527
-33
lines changed

4 files changed

+527
-33
lines changed

rs/consensus/src/consensus/purger.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -316,11 +316,7 @@ impl Purger {
316316

317317
/// Ask state manager to purge all states below the given height
318318
fn purge_replicated_state_by_finalized_certified_height(&self, pool: &PoolReader<'_>) {
319-
let height = pool
320-
.get_finalized_tip()
321-
.context
322-
.certified_height
323-
.min(self.state_manager.latest_state_height());
319+
let height = pool.get_finalized_tip().context.certified_height;
324320

325321
let extra_heights_to_keep = get_pending_idkg_cup_heights(pool);
326322
self.state_manager
@@ -340,7 +336,8 @@ impl Purger {
340336
/// the given height can be removed.
341337
///
342338
/// Note from the [`StateManager::remove_states_below`] docs:
343-
/// * The initial state (height = 0) cannot be removed.
339+
/// * The initial state (height = 0) is not removed.
340+
/// * The latest state is not removed.
344341
/// * Some states matching the removal criteria might be kept alive. For
345342
/// example, the last fully persisted state might be preserved to
346343
/// optimize future operations.

rs/interfaces/state_manager/src/lib.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,15 @@ pub trait StateManager: StateReader {
226226
/// Notify the state manager that states committed with partial certification
227227
/// state and heights strictly less than the specified `height` can be removed, except
228228
/// for any heights provided in `extra_heights_to_keep`, which will still be retained.
229+
/// The specified `height` is expected to be the *latest certified height*
230+
/// of the subnet, i.e., the latest height for which a valid certification is available
231+
/// to consensus.
232+
/// There are no guarantees for future heights in `extra_heights_to_keep`
233+
/// w.r.t. the height of the latest state snapshot stored by the state manager.
229234
///
230235
/// Note that:
231-
/// * The initial state (height = 0) cannot be removed.
236+
/// * The initial state (height = 0) is not removed.
237+
/// * The latest state is not removed.
232238
/// * Some states matching the removal criteria might be kept alive. For
233239
/// example, the last fully persisted state might be preserved to
234240
/// optimize future operations.

rs/state_manager/src/lib.rs

Lines changed: 162 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ use ic_utils_thread::{JoinOnDrop, deallocator_thread::DeallocatorThread};
6969
use ic_wasm_types::ModuleLoadingStatus;
7070
use prometheus::{Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec};
7171
use prost::Message;
72+
use std::cmp::min;
7273
use std::convert::{From, TryFrom};
7374
use std::fs::File;
7475
use std::fs::OpenOptions;
@@ -116,6 +117,10 @@ const CRITICAL_ERROR_REPLICATED_STATE_ALTERED_AFTER_CHECKPOINT: &str =
116117
/// How long to keep archived and diverged states.
117118
const ARCHIVED_DIVERGED_CHECKPOINT_MAX_AGE: Duration = Duration::from_secs(30 * 24 * 60 * 60); // 30 days
118119

120+
/// The maximum number of consecutive rounds for which the optimization of
121+
/// skipping state cloning and certification metadata computation triggers.
122+
const MAX_CONSECUTIVE_ROUNDS_WITHOUT_STATE_CLONING: u64 = 10;
123+
119124
/// Write an overlay file this many rounds before each checkpoint.
120125
pub const NUM_ROUNDS_BEFORE_CHECKPOINT_TO_WRITE_OVERLAY: u64 = 50;
121126

@@ -173,6 +178,8 @@ pub struct StateManagerMetrics {
173178
merge_metrics: MergeMetrics,
174179
latest_hash_tree_size: IntGauge,
175180
latest_hash_tree_max_index: IntGauge,
181+
latest_subnet_certified_height: IntGauge,
182+
no_state_clone_count: IntCounter,
176183
tip_hash_count: IntCounter,
177184
}
178185

@@ -459,6 +466,16 @@ impl StateManagerMetrics {
459466
"Largest index in the latest hash tree.",
460467
);
461468

469+
let latest_subnet_certified_height = metrics_registry.int_gauge(
470+
"state_manager_latest_subnet_certified_height",
471+
"Height of the latest validated certification.",
472+
);
473+
474+
let no_state_clone_count = metrics_registry.int_counter(
475+
"state_manager_no_state_clone_count",
476+
"Number of heights whose states were not cloned and not stored by this node.",
477+
);
478+
462479
let tip_hash_count = metrics_registry.int_counter(
463480
"state_manager_tip_hash_count",
464481
"Number of tip heights whose state snapshot was not stored by this node and whose state hash was computed by this node.",
@@ -489,6 +506,8 @@ impl StateManagerMetrics {
489506
merge_metrics: MergeMetrics::new(metrics_registry),
490507
latest_hash_tree_size,
491508
latest_hash_tree_max_index,
509+
latest_subnet_certified_height,
510+
no_state_clone_count,
492511
tip_hash_count,
493512
}
494513
}
@@ -916,6 +935,7 @@ pub struct StateManagerImpl {
916935
// requested quite often and this causes high contention on the lock.
917936
latest_state_height: AtomicU64,
918937
latest_certified_height: AtomicU64,
938+
latest_subnet_certified_height: AtomicU64,
919939
persist_metadata_guard: Arc<Mutex<()>>,
920940
tip_channel: Sender<TipRequest>,
921941
_tip_thread_handle: JoinOnDrop<()>,
@@ -1478,6 +1498,7 @@ impl StateManagerImpl {
14781498

14791499
let latest_state_height = AtomicU64::new(0);
14801500
let latest_certified_height = AtomicU64::new(0);
1501+
let latest_subnet_certified_height = AtomicU64::new(0);
14811502

14821503
let initial_snapshot = Snapshot {
14831504
height: Self::INITIAL_STATE_HEIGHT,
@@ -1592,6 +1613,7 @@ impl StateManagerImpl {
15921613
deallocator_thread,
15931614
latest_state_height,
15941615
latest_certified_height,
1616+
latest_subnet_certified_height,
15951617
persist_metadata_guard,
15961618
tip_channel,
15971619
_tip_thread_handle,
@@ -1939,8 +1961,8 @@ impl StateManagerImpl {
19391961
} else {
19401962
info!(
19411963
self.log,
1942-
"The previous certification metadata at height {} has been removed. This can happen when the replica \
1943-
syncs a newer state concurrently and removes the states below.",
1964+
"The previous certification metadata at height {} are not available. This can happen when the replica \
1965+
(i) catches up or (ii) syncs a newer state concurrently and removes the states below.",
19441966
prev_height,
19451967
);
19461968
}
@@ -2688,10 +2710,6 @@ impl StateManager for StateManagerImpl {
26882710
} else {
26892711
std::mem::drop(states);
26902712

2691-
if tip_height != Self::INITIAL_STATE_HEIGHT {
2692-
fatal!(self.log, "Bug: missing tip metadata @{}", tip_height);
2693-
}
2694-
26952713
let mut tip_certification_metadata =
26962714
Self::compute_certification_metadata(&self.metrics, &self.log, &tip)
26972715
.unwrap_or_else(|err| {
@@ -2987,9 +3005,9 @@ impl StateManager for StateManagerImpl {
29873005
hash, certification.signed.content.hash
29883006
);
29893007
}
3008+
29903009
let latest_certified =
29913010
update_latest_height(&self.latest_certified_height, certification.height);
2992-
29933011
self.metrics
29943012
.latest_certified_height
29953013
.set(latest_certified as i64);
@@ -3107,9 +3125,15 @@ impl StateManager for StateManagerImpl {
31073125

31083126
/// Variant of `remove_states_below()` that only removes states committed with
31093127
/// partial certification scope.
3128+
/// The specified `requested_height` is expected to be the *latest certified height*
3129+
/// of the subnet, i.e., the latest height for which a valid certification is available
3130+
/// to consensus.
3131+
/// There are no guarantees for future heights in `extra_heights_to_keep`
3132+
/// w.r.t. the height of the latest state snapshot stored by the state manager, i.e.,
3133+
/// for heights greater than `self.latest_state_height`.
31103134
///
31113135
/// The following states are NOT removed:
3112-
/// * Any state with height >= requested_height
3136+
/// * Any state with height >= min(requested_height, latest state height)
31133137
/// * Checkpoint heights
31143138
/// * The latest state
31153139
/// * The latest certified state
@@ -3126,6 +3150,14 @@ impl StateManager for StateManagerImpl {
31263150
.with_label_values(&["remove_inmemory_states_below"])
31273151
.start_timer();
31283152

3153+
let latest_subnet_certified_height =
3154+
update_latest_height(&self.latest_subnet_certified_height, requested_height);
3155+
self.metrics
3156+
.latest_subnet_certified_height
3157+
.set(latest_subnet_certified_height as i64);
3158+
3159+
let requested_height = min(requested_height, self.latest_state_height());
3160+
31293161
// The latest state must be kept.
31303162
let latest_state_height = self.latest_state_height();
31313163
let oldest_height_to_keep = latest_state_height
@@ -3184,10 +3216,6 @@ impl StateManager for StateManagerImpl {
31843216
.with_label_values(&["commit_and_certify"])
31853217
.start_timer();
31863218

3187-
self.metrics
3188-
.tip_handler_queue_length
3189-
.set(self.tip_channel.len() as i64);
3190-
31913219
self.populate_extra_metadata(&mut state, height);
31923220

31933221
if let CertificationScope::Metadata = scope {
@@ -3206,6 +3234,50 @@ impl StateManager for StateManagerImpl {
32063234
}
32073235
}
32083236

3237+
let assert_tip_is_none = |states: &SharedState| {
3238+
// The following assert validates that we don't have two clients
3239+
// modifying TIP at the same time and that each commit_and_certify()
3240+
// is preceded by a call to take_tip().
3241+
if let Some((tip_height, _)) = &states.tip {
3242+
fatal!(
3243+
self.log,
3244+
"Attempt to commit state not borrowed from this StateManager, height = {}, tip_height = {}",
3245+
height,
3246+
tip_height,
3247+
);
3248+
}
3249+
};
3250+
3251+
// If the node is catching up (`height.get() < latest_subnet_certified_height`)
3252+
// and this is not a checkpoint height (`matches!(scope, CertificationScope::Metadata)`),
3253+
// then we do not clone, do not hash, and do not store the state and certification metadata.
3254+
// This optimization is skipped every `MAX_CONSECUTIVE_ROUNDS_WITHOUT_STATE_CLONING` heights
3255+
// so that we always have a reasonably "recent" state snapshot and
3256+
// its certification metadata available.
3257+
let latest_subnet_certified_height =
3258+
self.latest_subnet_certified_height.load(Ordering::Relaxed);
3259+
if matches!(scope, CertificationScope::Metadata)
3260+
&& height.get() < latest_subnet_certified_height
3261+
&& !height
3262+
.get()
3263+
.is_multiple_of(MAX_CONSECUTIVE_ROUNDS_WITHOUT_STATE_CLONING)
3264+
{
3265+
let mut states = self.states.write();
3266+
#[cfg(debug_assertions)]
3267+
check_certifications_metadata_snapshots_and_states_metadata_are_consistent(&states);
3268+
3269+
assert_tip_is_none(&states);
3270+
3271+
self.metrics.no_state_clone_count.inc();
3272+
3273+
states.tip = Some((height, state));
3274+
return;
3275+
}
3276+
3277+
self.metrics
3278+
.tip_handler_queue_length
3279+
.set(self.tip_channel.len() as i64);
3280+
32093281
let mut state_metadata_and_compute_manifest_request: Option<(StateMetadata, TipRequest)> =
32103282
None;
32113283
let mut follow_up_tip_requests = Vec::new();
@@ -3254,17 +3326,7 @@ impl StateManager for StateManagerImpl {
32543326
#[cfg(debug_assertions)]
32553327
check_certifications_metadata_snapshots_and_states_metadata_are_consistent(&states);
32563328

3257-
// The following assert validates that we don't have two clients
3258-
// modifying TIP at the same time and that each commit_and_certify()
3259-
// is preceded by a call to take_tip().
3260-
if let Some((tip_height, _)) = &states.tip {
3261-
fatal!(
3262-
self.log,
3263-
"Attempt to commit state not borrowed from this StateManager, height = {}, tip_height = {}",
3264-
height,
3265-
tip_height,
3266-
);
3267-
}
3329+
assert_tip_is_none(&states);
32683330

32693331
// It's possible that we already computed this state before. We
32703332
// validate that hashes agree to spot bugs causing non-determinism as
@@ -3996,6 +4058,27 @@ pub mod testing {
39964058

39974059
/// Testing only: Wait till deallocation queue is empty.
39984060
fn flush_deallocation_channel(&self);
4061+
4062+
/// Testing only: Returns heights in `states.snapshots`.
4063+
fn state_snapshot_heights(&self) -> Vec<Height>;
4064+
4065+
/// Testing only: Returns state at a given height in `states.snapshots`.
4066+
fn state_snapshot(&self, height: Height) -> Arc<ReplicatedState>;
4067+
4068+
/// Testing only: Returns heights in `states.certifications_metadata`.
4069+
fn certifications_metadata_heights(&self) -> Vec<Height>;
4070+
4071+
/// Testing only: Returns hash tree at a given height in `states.certifications_metadata`.
4072+
fn certifications_metadata_hash_tree(&self, height: Height) -> Option<Arc<HashTree>>;
4073+
4074+
/// Testing only: Returns state hash at a given height in `states.certifications_metadata`.
4075+
fn certifications_metadata_state_hash(&self, height: Height) -> CryptoHash;
4076+
4077+
/// Testing only: Returns certification at a given height in `states.certifications_metadata`.
4078+
fn certifications_metadata_certification(&self, height: Height) -> Option<Certification>;
4079+
4080+
/// Testing only: Returns `latest_subnet_certified_height`.
4081+
fn latest_subnet_certified_height(&self) -> u64;
39994082
}
40004083

40014084
impl StateManagerTesting for StateManagerImpl {
@@ -4023,5 +4106,61 @@ pub mod testing {
40234106
fn flush_deallocation_channel(&self) {
40244107
self.deallocator_thread.flush_deallocation_channel();
40254108
}
4109+
4110+
fn state_snapshot_heights(&self) -> Vec<Height> {
4111+
let states = self.states.read();
4112+
states.snapshots.iter().map(|s| s.height).collect()
4113+
}
4114+
4115+
fn state_snapshot(&self, height: Height) -> Arc<ReplicatedState> {
4116+
let states = self.states.read();
4117+
states
4118+
.snapshots
4119+
.iter()
4120+
.find(|s| s.height == height)
4121+
.expect("Did not find state at given height")
4122+
.state
4123+
.clone()
4124+
}
4125+
4126+
fn certifications_metadata_heights(&self) -> Vec<Height> {
4127+
let states = self.states.read();
4128+
states.certifications_metadata.keys().cloned().collect()
4129+
}
4130+
4131+
fn certifications_metadata_hash_tree(&self, height: Height) -> Option<Arc<HashTree>> {
4132+
let states = self.states.read();
4133+
states
4134+
.certifications_metadata
4135+
.get(&height)
4136+
.expect("Did not find metadata at given height")
4137+
.hash_tree
4138+
.clone()
4139+
.map(|(hash_tree, _)| hash_tree)
4140+
}
4141+
4142+
fn certifications_metadata_state_hash(&self, height: Height) -> CryptoHash {
4143+
let states = self.states.read();
4144+
states
4145+
.certifications_metadata
4146+
.get(&height)
4147+
.expect("Did not find metadata at given height")
4148+
.certified_state_hash
4149+
.clone()
4150+
}
4151+
4152+
fn certifications_metadata_certification(&self, height: Height) -> Option<Certification> {
4153+
let states = self.states.read();
4154+
states
4155+
.certifications_metadata
4156+
.get(&height)
4157+
.expect("Did not find metadata at given height")
4158+
.certification
4159+
.clone()
4160+
}
4161+
4162+
fn latest_subnet_certified_height(&self) -> u64 {
4163+
self.latest_subnet_certified_height.load(Ordering::Relaxed)
4164+
}
40264165
}
40274166
}

0 commit comments

Comments
 (0)