Skip to content

Commit 970606e

Browse files
v2.1: Marks old storages as dirty and uncleaned in clean_accounts() (backport of #3737) (#3748)
* Marks old storages as dirty and uncleaned in clean_accounts() (#3737) (cherry picked from commit 31742ca) # Conflicts: # accounts-db/src/accounts_db/tests.rs * fixes merge conflict --------- Co-authored-by: Brooks <[email protected]>
1 parent 46a2595 commit 970606e

File tree

2 files changed

+142
-59
lines changed

2 files changed

+142
-59
lines changed

accounts-db/src/accounts_db.rs

Lines changed: 116 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2457,11 +2457,30 @@ impl AccountsDb {
24572457
is_startup: bool,
24582458
timings: &mut CleanKeyTimings,
24592459
epoch_schedule: &EpochSchedule,
2460+
old_storages_policy: OldStoragesPolicy,
24602461
) -> CleaningCandidates {
24612462
let oldest_non_ancient_slot = self.get_oldest_non_ancient_slot(epoch_schedule);
24622463
let mut dirty_store_processing_time = Measure::start("dirty_store_processing");
2463-
let max_slot_inclusive =
2464-
max_clean_root_inclusive.unwrap_or_else(|| self.accounts_index.max_root_inclusive());
2464+
let max_root_inclusive = self.accounts_index.max_root_inclusive();
2465+
let max_slot_inclusive = max_clean_root_inclusive.unwrap_or(max_root_inclusive);
2466+
2467+
if old_storages_policy == OldStoragesPolicy::Clean {
2468+
let slot_one_epoch_old =
2469+
max_root_inclusive.saturating_sub(epoch_schedule.slots_per_epoch);
2470+
// do nothing special for these 100 old storages that will likely get cleaned up shortly
2471+
let acceptable_straggler_slot_count = 100;
2472+
let old_slot_cutoff =
2473+
slot_one_epoch_old.saturating_sub(acceptable_straggler_slot_count);
2474+
let (old_storages, old_slots) = self.get_snapshot_storages(..old_slot_cutoff);
2475+
let num_old_storages = old_storages.len();
2476+
self.accounts_index
2477+
.add_uncleaned_roots(old_slots.iter().copied());
2478+
for (old_slot, old_storage) in std::iter::zip(old_slots, old_storages) {
2479+
self.dirty_stores.entry(old_slot).or_insert(old_storage);
2480+
}
2481+
info!("Marked {num_old_storages} old storages as dirty");
2482+
}
2483+
24652484
let mut dirty_stores = Vec::with_capacity(self.dirty_stores.len());
24662485
// find the oldest dirty slot
24672486
// we'll add logging if that append vec cannot be marked dead
@@ -2573,7 +2592,16 @@ impl AccountsDb {
25732592

25742593
/// Call clean_accounts() with the common parameters that tests/benches use.
25752594
pub fn clean_accounts_for_tests(&self) {
2576-
self.clean_accounts(None, false, &EpochSchedule::default())
2595+
self.clean_accounts(
2596+
None,
2597+
false,
2598+
&EpochSchedule::default(),
2599+
if self.ancient_append_vec_offset.is_some() {
2600+
OldStoragesPolicy::Leave
2601+
} else {
2602+
OldStoragesPolicy::Clean
2603+
},
2604+
)
25772605
}
25782606

25792607
/// called with cli argument to verify refcounts are correct on all accounts
@@ -2680,6 +2708,7 @@ impl AccountsDb {
26802708
max_clean_root_inclusive: Option<Slot>,
26812709
is_startup: bool,
26822710
epoch_schedule: &EpochSchedule,
2711+
old_storages_policy: OldStoragesPolicy,
26832712
) {
26842713
if self.exhaustively_verify_refcounts {
26852714
self.exhaustively_verify_refcounts(max_clean_root_inclusive);
@@ -2701,6 +2730,7 @@ impl AccountsDb {
27012730
is_startup,
27022731
&mut key_timings,
27032732
epoch_schedule,
2733+
old_storages_policy,
27042734
);
27052735

27062736
let num_candidates = Self::count_pubkeys(&candidates);
@@ -4561,7 +4591,15 @@ impl AccountsDb {
45614591
let maybe_clean = || {
45624592
if self.dirty_stores.len() > DIRTY_STORES_CLEANING_THRESHOLD {
45634593
let latest_full_snapshot_slot = self.latest_full_snapshot_slot();
4564-
self.clean_accounts(latest_full_snapshot_slot, is_startup, epoch_schedule);
4594+
self.clean_accounts(
4595+
latest_full_snapshot_slot,
4596+
is_startup,
4597+
epoch_schedule,
4598+
// Leave any old storages alone for now. Once the validator is running
4599+
// normal, calls to clean_accounts() will have the correct policy based
4600+
// on if ancient storages are enabled or not.
4601+
OldStoragesPolicy::Leave,
4602+
);
45654603
}
45664604
};
45674605

@@ -6738,40 +6776,6 @@ impl AccountsDb {
67386776
true
67396777
}
67406778

6741-
/// storages are sorted by slot and have range info.
6742-
/// add all stores older than slots_per_epoch to dirty_stores so clean visits these slots
6743-
fn mark_old_slots_as_dirty(
6744-
&self,
6745-
storages: &SortedStorages,
6746-
slots_per_epoch: Slot,
6747-
stats: &mut crate::accounts_hash::HashStats,
6748-
) {
6749-
// Nothing to do if ancient append vecs are enabled.
6750-
// Ancient slots will be visited by the ancient append vec code and dealt with correctly.
6751-
// we expect these ancient append vecs to be old and keeping accounts
6752-
// We can expect the normal processes will keep them cleaned.
6753-
// If we included them here then ALL accounts in ALL ancient append vecs will be visited by clean each time.
6754-
if self.ancient_append_vec_offset.is_some() {
6755-
return;
6756-
}
6757-
6758-
let mut mark_time = Measure::start("mark_time");
6759-
let mut num_dirty_slots: usize = 0;
6760-
let max = storages.max_slot_inclusive();
6761-
let acceptable_straggler_slot_count = 100; // do nothing special for these old stores which will likely get cleaned up shortly
6762-
let sub = slots_per_epoch + acceptable_straggler_slot_count;
6763-
let in_epoch_range_start = max.saturating_sub(sub);
6764-
for (slot, storage) in storages.iter_range(&(..in_epoch_range_start)) {
6765-
if let Some(storage) = storage {
6766-
self.dirty_stores.insert(slot, storage.clone());
6767-
num_dirty_slots += 1;
6768-
}
6769-
}
6770-
mark_time.stop();
6771-
stats.mark_time_us = mark_time.as_us();
6772-
stats.num_dirty_slots = num_dirty_slots;
6773-
}
6774-
67756779
pub fn calculate_accounts_hash_from(
67766780
&self,
67776781
data_source: CalcAccountsHashDataSource,
@@ -7112,8 +7116,6 @@ impl AccountsDb {
71127116
let storages_start_slot = storages.range().start;
71137117
stats.oldest_root = storages_start_slot;
71147118

7115-
self.mark_old_slots_as_dirty(storages, config.epoch_schedule.slots_per_epoch, &mut stats);
7116-
71177119
let slot = storages.max_slot_inclusive();
71187120
let use_bg_thread_pool = config.use_bg_thread_pool;
71197121
let accounts_hash_cache_path = self.accounts_hash_cache_path.clone();
@@ -9080,6 +9082,20 @@ pub(crate) enum UpdateIndexThreadSelection {
90809082
PoolWithThreshold,
90819083
}
90829084

9085+
/// How should old storages be handled in clean_accounts()?
9086+
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
9087+
pub enum OldStoragesPolicy {
9088+
/// Clean all old storages, even if they were not explictly marked as dirty.
9089+
///
9090+
/// This is the default behavior when not skipping rewrites.
9091+
Clean,
9092+
/// Leave all old storages.
9093+
///
9094+
/// When skipping rewrites, we intentionally will have ancient storages.
9095+
/// Do not clean them up automatically in clean_accounts().
9096+
Leave,
9097+
}
9098+
90839099
// These functions/fields are only usable from a dev context (i.e. tests and benches)
90849100
#[cfg(feature = "dev-context-only-utils")]
90859101
impl AccountStorageEntry {
@@ -11304,13 +11320,23 @@ pub mod tests {
1130411320
// updates in later slots in slot 1
1130511321
assert_eq!(accounts.alive_account_count_in_slot(0), 1);
1130611322
assert_eq!(accounts.alive_account_count_in_slot(1), 1);
11307-
accounts.clean_accounts(Some(0), false, &EpochSchedule::default());
11323+
accounts.clean_accounts(
11324+
Some(0),
11325+
false,
11326+
&EpochSchedule::default(),
11327+
OldStoragesPolicy::Leave,
11328+
);
1130811329
assert_eq!(accounts.alive_account_count_in_slot(0), 1);
1130911330
assert_eq!(accounts.alive_account_count_in_slot(1), 1);
1131011331
assert!(accounts.accounts_index.contains_with(&pubkey, None, None));
1131111332

1131211333
// Now the account can be cleaned up
11313-
accounts.clean_accounts(Some(1), false, &EpochSchedule::default());
11334+
accounts.clean_accounts(
11335+
Some(1),
11336+
false,
11337+
&EpochSchedule::default(),
11338+
OldStoragesPolicy::Leave,
11339+
);
1131411340
assert_eq!(accounts.alive_account_count_in_slot(0), 0);
1131511341
assert_eq!(accounts.alive_account_count_in_slot(1), 0);
1131611342

@@ -12842,7 +12868,12 @@ pub mod tests {
1284212868
db.add_root_and_flush_write_cache(1);
1284312869

1284412870
// Only clean zero lamport accounts up to slot 0
12845-
db.clean_accounts(Some(0), false, &EpochSchedule::default());
12871+
db.clean_accounts(
12872+
Some(0),
12873+
false,
12874+
&EpochSchedule::default(),
12875+
OldStoragesPolicy::Leave,
12876+
);
1284612877

1284712878
// Should still be able to find zero lamport account in slot 1
1284812879
assert_eq!(
@@ -13996,7 +14027,12 @@ pub mod tests {
1399614027
db.calculate_accounts_delta_hash(1);
1399714028

1399814029
// Clean to remove outdated entry from slot 0
13999-
db.clean_accounts(Some(1), false, &EpochSchedule::default());
14030+
db.clean_accounts(
14031+
Some(1),
14032+
false,
14033+
&EpochSchedule::default(),
14034+
OldStoragesPolicy::Leave,
14035+
);
1400014036

1400114037
// Shrink Slot 0
1400214038
{
@@ -14015,7 +14051,12 @@ pub mod tests {
1401514051
// Should be one store before clean for slot 0
1401614052
db.get_and_assert_single_storage(0);
1401714053
db.calculate_accounts_delta_hash(2);
14018-
db.clean_accounts(Some(2), false, &EpochSchedule::default());
14054+
db.clean_accounts(
14055+
Some(2),
14056+
false,
14057+
&EpochSchedule::default(),
14058+
OldStoragesPolicy::Leave,
14059+
);
1401914060

1402014061
// No stores should exist for slot 0 after clean
1402114062
assert_no_storages_at_slot(&db, 0);
@@ -14862,15 +14903,30 @@ pub mod tests {
1486214903
assert_eq!(accounts_db.ref_count_for_pubkey(&pubkey), 3);
1486314904

1486414905
accounts_db.set_latest_full_snapshot_slot(slot2);
14865-
accounts_db.clean_accounts(Some(slot2), false, &EpochSchedule::default());
14906+
accounts_db.clean_accounts(
14907+
Some(slot2),
14908+
false,
14909+
&EpochSchedule::default(),
14910+
OldStoragesPolicy::Leave,
14911+
);
1486614912
assert_eq!(accounts_db.ref_count_for_pubkey(&pubkey), 2);
1486714913

1486814914
accounts_db.set_latest_full_snapshot_slot(slot2);
14869-
accounts_db.clean_accounts(None, false, &EpochSchedule::default());
14915+
accounts_db.clean_accounts(
14916+
None,
14917+
false,
14918+
&EpochSchedule::default(),
14919+
OldStoragesPolicy::Leave,
14920+
);
1487014921
assert_eq!(accounts_db.ref_count_for_pubkey(&pubkey), 1);
1487114922

1487214923
accounts_db.set_latest_full_snapshot_slot(slot3);
14873-
accounts_db.clean_accounts(None, false, &EpochSchedule::default());
14924+
accounts_db.clean_accounts(
14925+
None,
14926+
false,
14927+
&EpochSchedule::default(),
14928+
OldStoragesPolicy::Leave,
14929+
);
1487414930
assert_eq!(accounts_db.ref_count_for_pubkey(&pubkey), 0);
1487514931
}
1487614932
);
@@ -17189,7 +17245,12 @@ pub mod tests {
1718917245

1719017246
// calculate the full accounts hash
1719117247
let full_accounts_hash = {
17192-
accounts_db.clean_accounts(Some(slot - 1), false, &EpochSchedule::default());
17248+
accounts_db.clean_accounts(
17249+
Some(slot - 1),
17250+
false,
17251+
&EpochSchedule::default(),
17252+
OldStoragesPolicy::Leave,
17253+
);
1719317254
let (storages, _) = accounts_db.get_snapshot_storages(..=slot);
1719417255
let storages = SortedStorages::new(&storages);
1719517256
accounts_db.calculate_accounts_hash(
@@ -17255,7 +17316,12 @@ pub mod tests {
1725517316
// calculate the incremental accounts hash
1725617317
let incremental_accounts_hash = {
1725717318
accounts_db.set_latest_full_snapshot_slot(full_accounts_hash_slot);
17258-
accounts_db.clean_accounts(Some(slot - 1), false, &EpochSchedule::default());
17319+
accounts_db.clean_accounts(
17320+
Some(slot - 1),
17321+
false,
17322+
&EpochSchedule::default(),
17323+
OldStoragesPolicy::Leave,
17324+
);
1725917325
let (storages, _) =
1726017326
accounts_db.get_snapshot_storages(full_accounts_hash_slot + 1..=slot);
1726117327
let storages = SortedStorages::new(&storages);

runtime/src/bank.rs

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ use {
7575
accounts::{AccountAddressFilter, Accounts, PubkeyAccountSlot},
7676
accounts_db::{
7777
AccountStorageEntry, AccountsDb, AccountsDbConfig, CalcAccountsHashDataSource,
78-
DuplicatesLtHash, PubkeyHashAccount, VerifyAccountsHashAndLamportsConfig,
78+
DuplicatesLtHash, OldStoragesPolicy, PubkeyHashAccount,
79+
VerifyAccountsHashAndLamportsConfig,
7980
},
8081
accounts_hash::{
8182
AccountHash, AccountsHash, AccountsLtHash, CalcAccountsHashConfig, HashStats,
@@ -6114,14 +6115,15 @@ impl Bank {
61146115
let should_clean = force_clean || (!skip_shrink && self.slot() > 0);
61156116
if should_clean {
61166117
info!("Cleaning...");
6117-
// We cannot clean past the last full snapshot's slot because we are about to
6118+
// We cannot clean past the latest full snapshot's slot because we are about to
61186119
// perform an accounts hash calculation *up to that slot*. If we cleaned *past*
61196120
// that slot, then accounts could be removed from older storages, which would
61206121
// change the accounts hash.
61216122
self.rc.accounts.accounts_db.clean_accounts(
61226123
Some(latest_full_snapshot_slot),
61236124
true,
61246125
self.epoch_schedule(),
6126+
self.clean_accounts_old_storages_policy(),
61256127
);
61266128
info!("Cleaning... Done.");
61276129
} else {
@@ -6457,6 +6459,7 @@ impl Bank {
64576459
Some(highest_slot_to_clean),
64586460
false,
64596461
self.epoch_schedule(),
6462+
self.clean_accounts_old_storages_policy(),
64606463
);
64616464
}
64626465

@@ -6472,23 +6475,37 @@ impl Bank {
64726475
}
64736476

64746477
pub(crate) fn shrink_ancient_slots(&self) {
6475-
let can_skip_rewrites = self.bank_hash_skips_rent_rewrites();
6476-
let test_skip_rewrites_but_include_in_bank_hash = self
6477-
.rc
6478-
.accounts
6479-
.accounts_db
6480-
.test_skip_rewrites_but_include_in_bank_hash;
64816478
// Invoke ancient slot shrinking only when the validator is
64826479
// explicitly configured to do so. This condition may be
64836480
// removed when the skip rewrites feature is enabled.
6484-
if can_skip_rewrites || test_skip_rewrites_but_include_in_bank_hash {
6481+
if self.are_ancient_storages_enabled() {
64856482
self.rc
64866483
.accounts
64876484
.accounts_db
64886485
.shrink_ancient_slots(self.epoch_schedule())
64896486
}
64906487
}
64916488

6489+
/// Returns if ancient storages are enabled or not
6490+
pub fn are_ancient_storages_enabled(&self) -> bool {
6491+
let can_skip_rewrites = self.bank_hash_skips_rent_rewrites();
6492+
let test_skip_rewrites_but_include_in_bank_hash = self
6493+
.rc
6494+
.accounts
6495+
.accounts_db
6496+
.test_skip_rewrites_but_include_in_bank_hash;
6497+
can_skip_rewrites || test_skip_rewrites_but_include_in_bank_hash
6498+
}
6499+
6500+
/// Returns how clean_accounts() should handle old storages
6501+
fn clean_accounts_old_storages_policy(&self) -> OldStoragesPolicy {
6502+
if self.are_ancient_storages_enabled() {
6503+
OldStoragesPolicy::Leave
6504+
} else {
6505+
OldStoragesPolicy::Clean
6506+
}
6507+
}
6508+
64926509
pub fn read_cost_tracker(&self) -> LockResult<RwLockReadGuard<CostTracker>> {
64936510
self.cost_tracker.read()
64946511
}

0 commit comments

Comments
 (0)