Skip to content

Commit dd54f65

Browse files
paritytech-release-backport-bot[bot]bkchrgithub-actions[bot]
authored
[stable2512] Backport #10495 (#10532)
Backport #10495 into `stable2512` from bkchr. See the [documentation](https://github.com/paritytech/polkadot-sdk/blob/master/docs/BACKPORT.md) on how to use this bot. <!-- # To be used by other automation, do not modify: original-pr-number: #${pull_number} --> Co-authored-by: Bastian Köcher <[email protected]> Co-authored-by: cmd[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent bbed2d3 commit dd54f65

File tree

9 files changed

+144
-69
lines changed

9 files changed

+144
-69
lines changed

Cargo.lock

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -895,7 +895,7 @@ k256 = { version = "0.13.4", default-features = false }
895895
kitchensink-runtime = { path = "substrate/bin/node/runtime" }
896896
kvdb = { version = "0.13.0" }
897897
kvdb-memorydb = { version = "0.13.0" }
898-
kvdb-rocksdb = { version = "0.20.1" }
898+
kvdb-rocksdb = { version = "0.21.0" }
899899
kvdb-shared-tests = { version = "0.11.0" }
900900
landlock = { version = "0.3.0" }
901901
libc = { version = "0.2.155" }

prdoc/pr_10495.prdoc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
title: 'Rocksdb: Force compact columns on after warp sync'
2+
doc:
3+
- audience: Node Operator
4+
description: |-
5+
Recently we introduced a change that was always force compacting a Rocksdb database when starting a node and after writing a lot of data. We found out that force compacting a huge RocksDB of more than 600GB takes quite some time (more than one hour) and this every time.
6+
7+
So, this pull request changes the compaction to only happen after warp sync (and genesis) when we reset the state column to some given state. This way we don't run it anymore on startup of the node and it should fix the problems we have seen with archive nodes.
8+
crates:
9+
- name: sc-client-db
10+
bump: patch
11+
validate: false
12+
- name: sp-database
13+
bump: patch
14+
validate: false

substrate/client/db/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ codec = { features = ["derive"], workspace = true, default-features = true }
2727
hash-db = { workspace = true, default-features = true }
2828
kvdb = { workspace = true }
2929
kvdb-memorydb = { workspace = true }
30-
kvdb-rocksdb = { optional = true, workspace = true }
30+
kvdb-rocksdb = { optional = true, workspace = true, features = ["jemalloc"] }
3131
linked-hash-map = { workspace = true }
3232
log = { workspace = true, default-features = true }
3333
parity-db = { workspace = true }
@@ -62,4 +62,4 @@ runtime-benchmarks = [
6262
"kitchensink-runtime/runtime-benchmarks",
6363
"sp-runtime/runtime-benchmarks",
6464
]
65-
rocksdb = ["kvdb-rocksdb"]
65+
rocksdb = ["kvdb-rocksdb", "sp-database/rocksdb"]

substrate/client/db/src/lib.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -839,6 +839,7 @@ pub struct BlockImportOperation<Block: BlockT> {
839839
set_head: Option<Block::Hash>,
840840
commit_state: bool,
841841
create_gap: bool,
842+
reset_storage: bool,
842843
index_ops: Vec<IndexOperation>,
843844
}
844845

@@ -934,6 +935,7 @@ impl<Block: BlockT> sc_client_api::backend::BlockImportOperation<Block>
934935
) -> ClientResult<Block::Hash> {
935936
let root = self.apply_new_state(storage, state_version)?;
936937
self.commit_state = true;
938+
self.reset_storage = true;
937939
Ok(root)
938940
}
939941

@@ -1841,6 +1843,14 @@ impl<Block: BlockT> Backend<Block> {
18411843

18421844
self.storage.db.commit(transaction)?;
18431845

1846+
// `reset_storage == true` means the entire state got replaced.
1847+
// In this case we optimize the `STATE` column to improve read performance.
1848+
if operation.reset_storage {
1849+
if let Err(e) = self.storage.db.optimize_db_col(columns::STATE) {
1850+
warn!(target: "db", "Failed to optimize database after state import: {e:?}");
1851+
}
1852+
}
1853+
18441854
// Apply all in-memory state changes.
18451855
// Code beyond this point can't fail.
18461856

@@ -2152,6 +2162,7 @@ impl<Block: BlockT> sc_client_api::backend::Backend<Block> for Backend<Block> {
21522162
set_head: None,
21532163
commit_state: false,
21542164
create_gap: true,
2165+
reset_storage: false,
21552166
index_ops: Default::default(),
21562167
})
21572168
}

substrate/client/db/src/utils.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ fn open_kvdb_rocksdb<Block: BlockT>(
349349
let db = kvdb_rocksdb::Database::open(&db_config, path)?;
350350
// write database version only after the database is successfully opened
351351
crate::upgrade::update_version(path)?;
352-
Ok(sp_database::as_database(db))
352+
Ok(sp_database::as_rocksdb_database(db))
353353
}
354354

355355
#[cfg(not(any(feature = "rocksdb", test)))]

substrate/primitives/database/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,9 @@ workspace = true
1515

1616
[dependencies]
1717
kvdb = { workspace = true }
18+
kvdb-rocksdb = { optional = true, workspace = true }
1819
parking_lot = { workspace = true, default-features = true }
20+
21+
[features]
22+
default = []
23+
rocksdb = ["kvdb-rocksdb"]

substrate/primitives/database/src/kvdb.rs

Lines changed: 100 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,75 @@ fn handle_err<T>(result: std::io::Result<T>) -> T {
3131
}
3232
}
3333

34-
/// Wrap RocksDb database into a trait object that implements `sp_database::Database`
34+
/// Read the reference counter for a key.
35+
fn read_counter(
36+
db: &dyn KeyValueDB,
37+
col: ColumnId,
38+
key: &[u8],
39+
) -> error::Result<(Vec<u8>, Option<u32>)> {
40+
let mut counter_key = key.to_vec();
41+
counter_key.push(0);
42+
Ok(match db.get(col, &counter_key).map_err(|e| error::DatabaseError(Box::new(e)))? {
43+
Some(data) => {
44+
let mut counter_data = [0; 4];
45+
if data.len() != 4 {
46+
return Err(error::DatabaseError(Box::new(std::io::Error::new(
47+
std::io::ErrorKind::Other,
48+
format!("Unexpected counter len {}", data.len()),
49+
))))
50+
}
51+
counter_data.copy_from_slice(&data);
52+
let counter = u32::from_le_bytes(counter_data);
53+
(counter_key, Some(counter))
54+
},
55+
None => (counter_key, None),
56+
})
57+
}
58+
59+
/// Commit a transaction to a KeyValueDB.
60+
fn commit_impl<H: Clone + AsRef<[u8]>>(
61+
db: &dyn KeyValueDB,
62+
transaction: Transaction<H>,
63+
) -> error::Result<()> {
64+
let mut tx = DBTransaction::new();
65+
for change in transaction.0.into_iter() {
66+
match change {
67+
Change::Set(col, key, value) => tx.put_vec(col, &key, value),
68+
Change::Remove(col, key) => tx.delete(col, &key),
69+
Change::Store(col, key, value) => match read_counter(db, col, key.as_ref())? {
70+
(counter_key, Some(mut counter)) => {
71+
counter += 1;
72+
tx.put(col, &counter_key, &counter.to_le_bytes());
73+
},
74+
(counter_key, None) => {
75+
let d = 1u32.to_le_bytes();
76+
tx.put(col, &counter_key, &d);
77+
tx.put_vec(col, key.as_ref(), value);
78+
},
79+
},
80+
Change::Reference(col, key) => {
81+
if let (counter_key, Some(mut counter)) = read_counter(db, col, key.as_ref())? {
82+
counter += 1;
83+
tx.put(col, &counter_key, &counter.to_le_bytes());
84+
}
85+
},
86+
Change::Release(col, key) => {
87+
if let (counter_key, Some(mut counter)) = read_counter(db, col, key.as_ref())? {
88+
counter -= 1;
89+
if counter == 0 {
90+
tx.delete(col, &counter_key);
91+
tx.delete(col, key.as_ref());
92+
} else {
93+
tx.put(col, &counter_key, &counter.to_le_bytes());
94+
}
95+
}
96+
},
97+
}
98+
}
99+
db.write(tx).map_err(|e| error::DatabaseError(Box::new(e)))
100+
}
101+
102+
/// Wrap generic kvdb-based database into a trait object that implements [`Database`].
35103
pub fn as_database<D, H>(db: D) -> std::sync::Arc<dyn Database<H>>
36104
where
37105
D: KeyValueDB + 'static,
@@ -40,72 +108,28 @@ where
40108
std::sync::Arc::new(DbAdapter(db))
41109
}
42110

43-
impl<D: KeyValueDB> DbAdapter<D> {
44-
// Returns counter key and counter value if it exists.
45-
fn read_counter(&self, col: ColumnId, key: &[u8]) -> error::Result<(Vec<u8>, Option<u32>)> {
46-
// Add a key suffix for the counter
47-
let mut counter_key = key.to_vec();
48-
counter_key.push(0);
49-
Ok(match self.0.get(col, &counter_key).map_err(|e| error::DatabaseError(Box::new(e)))? {
50-
Some(data) => {
51-
let mut counter_data = [0; 4];
52-
if data.len() != 4 {
53-
return Err(error::DatabaseError(Box::new(std::io::Error::new(
54-
std::io::ErrorKind::Other,
55-
format!("Unexpected counter len {}", data.len()),
56-
))))
57-
}
58-
counter_data.copy_from_slice(&data);
59-
let counter = u32::from_le_bytes(counter_data);
60-
(counter_key, Some(counter))
61-
},
62-
None => (counter_key, None),
63-
})
111+
impl<D: KeyValueDB, H: Clone + AsRef<[u8]>> Database<H> for DbAdapter<D> {
112+
fn commit(&self, transaction: Transaction<H>) -> error::Result<()> {
113+
commit_impl(&self.0, transaction)
114+
}
115+
116+
fn get(&self, col: ColumnId, key: &[u8]) -> Option<Vec<u8>> {
117+
handle_err(self.0.get(col, key))
118+
}
119+
120+
fn contains(&self, col: ColumnId, key: &[u8]) -> bool {
121+
handle_err(self.0.has_key(col, key))
64122
}
65123
}
66124

67-
impl<D: KeyValueDB, H: Clone + AsRef<[u8]>> Database<H> for DbAdapter<D> {
125+
/// RocksDB-specific adapter that implements `optimize_db` via `force_compact`.
126+
#[cfg(feature = "rocksdb")]
127+
pub struct RocksDbAdapter(kvdb_rocksdb::Database);
128+
129+
#[cfg(feature = "rocksdb")]
130+
impl<H: Clone + AsRef<[u8]>> Database<H> for RocksDbAdapter {
68131
fn commit(&self, transaction: Transaction<H>) -> error::Result<()> {
69-
let mut tx = DBTransaction::new();
70-
for change in transaction.0.into_iter() {
71-
match change {
72-
Change::Set(col, key, value) => tx.put_vec(col, &key, value),
73-
Change::Remove(col, key) => tx.delete(col, &key),
74-
Change::Store(col, key, value) => match self.read_counter(col, key.as_ref())? {
75-
(counter_key, Some(mut counter)) => {
76-
counter += 1;
77-
tx.put(col, &counter_key, &counter.to_le_bytes());
78-
},
79-
(counter_key, None) => {
80-
let d = 1u32.to_le_bytes();
81-
tx.put(col, &counter_key, &d);
82-
tx.put_vec(col, key.as_ref(), value);
83-
},
84-
},
85-
Change::Reference(col, key) => {
86-
if let (counter_key, Some(mut counter)) =
87-
self.read_counter(col, key.as_ref())?
88-
{
89-
counter += 1;
90-
tx.put(col, &counter_key, &counter.to_le_bytes());
91-
}
92-
},
93-
Change::Release(col, key) => {
94-
if let (counter_key, Some(mut counter)) =
95-
self.read_counter(col, key.as_ref())?
96-
{
97-
counter -= 1;
98-
if counter == 0 {
99-
tx.delete(col, &counter_key);
100-
tx.delete(col, key.as_ref());
101-
} else {
102-
tx.put(col, &counter_key, &counter.to_le_bytes());
103-
}
104-
}
105-
},
106-
}
107-
}
108-
self.0.write(tx).map_err(|e| error::DatabaseError(Box::new(e)))
132+
commit_impl(&self.0, transaction)
109133
}
110134

111135
fn get(&self, col: ColumnId, key: &[u8]) -> Option<Vec<u8>> {
@@ -115,4 +139,17 @@ impl<D: KeyValueDB, H: Clone + AsRef<[u8]>> Database<H> for DbAdapter<D> {
115139
fn contains(&self, col: ColumnId, key: &[u8]) -> bool {
116140
handle_err(self.0.has_key(col, key))
117141
}
142+
143+
fn optimize_db_col(&self, col: ColumnId) -> error::Result<()> {
144+
self.0.force_compact(col).map_err(|e| error::DatabaseError(Box::new(e)))
145+
}
146+
}
147+
148+
/// Wrap RocksDB database into a trait object with `optimize_db` support.
149+
#[cfg(feature = "rocksdb")]
150+
pub fn as_rocksdb_database<H>(db: kvdb_rocksdb::Database) -> std::sync::Arc<dyn Database<H>>
151+
where
152+
H: Clone + AsRef<[u8]>,
153+
{
154+
std::sync::Arc::new(RocksDbAdapter(db))
118155
}

substrate/primitives/database/src/lib.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ mod kvdb;
2222
mod mem;
2323

2424
pub use crate::kvdb::as_database;
25+
#[cfg(feature = "rocksdb")]
26+
pub use crate::kvdb::as_rocksdb_database;
2527
pub use mem::MemDb;
2628

2729
/// An identifier for a column.
@@ -117,6 +119,11 @@ pub trait Database<H: Clone + AsRef<[u8]>>: Send + Sync {
117119
///
118120
/// Not all database implementations use a prefix for keys, so this function may be a noop.
119121
fn sanitize_key(&self, _key: &mut Vec<u8>) {}
122+
123+
/// Optimize a database column.
124+
fn optimize_db_col(&self, _col: ColumnId) -> error::Result<()> {
125+
Ok(())
126+
}
120127
}
121128

122129
impl<H> std::fmt::Debug for dyn Database<H> {

0 commit comments

Comments
 (0)