From 18067998ac4f57d5ce9c050cb870a331ea198a40 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com> Date: Wed, 2 Jul 2025 06:08:31 +0000 Subject: [PATCH 1/7] feat(db): include database version in `DbEnv` - Add private `version` field to `DbEnvInner` struct - Modify `DbEnv::open()` to accept version parameter - Update `open_ephemeral()` to use `CURRENT_DB_VERSION` - Add public `version()` getter method to access stored version - Update `open_db()` and `init_db()` to read and pass version from file - Update CLI database opening functions to handle version parameter The database version is now accessible at the provider level through `DbEnv::version()` method as requested in issue #154. Co-authored-by: Ammar Arif --- bin/katana/src/cli/db/mod.rs | 11 +++++++++-- crates/storage/db/src/lib.rs | 15 ++++++++++----- crates/storage/db/src/mdbx/mod.rs | 14 +++++++++++--- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/bin/katana/src/cli/db/mod.rs b/bin/katana/src/cli/db/mod.rs index 11ac0e430..df5c447ac 100644 --- a/bin/katana/src/cli/db/mod.rs +++ b/bin/katana/src/cli/db/mod.rs @@ -6,6 +6,7 @@ use comfy_table::modifiers::UTF8_ROUND_CORNERS; use comfy_table::presets::UTF8_FULL; use comfy_table::Table; use katana_db::mdbx::{DbEnv, DbEnvKind}; +use katana_db::version::get_db_version; mod prune; mod stats; @@ -45,14 +46,20 @@ impl DbArgs { /// error messages. pub fn open_db_ro(path: &str) -> Result { let path = path::absolute(shellexpand::full(path)?.into_owned())?; - DbEnv::open(&path, DbEnvKind::RO).with_context(|| { + let version = get_db_version(&path).with_context(|| { + format!("Reading database version from path {}", path.display()) + })?; + DbEnv::open(&path, DbEnvKind::RO, version).with_context(|| { format!("Opening database file in read-only mode at path {}", path.display()) }) } pub fn open_db_rw(path: &str) -> Result { let path = path::absolute(shellexpand::full(path)?.into_owned())?; - katana_db::open_db(path) + let version = get_db_version(&path).with_context(|| { + format!("Reading database version from path {}", path.display()) + })?; + katana_db::open_db(path, version) } /// Create a table with the default UTF-8 full border and rounded corners. diff --git a/crates/storage/db/src/lib.rs b/crates/storage/db/src/lib.rs index b4a478f11..ce1d13ecd 100644 --- a/crates/storage/db/src/lib.rs +++ b/crates/storage/db/src/lib.rs @@ -21,8 +21,8 @@ pub mod version; use mdbx::{DbEnv, DbEnvKind}; use utils::is_database_empty; use version::{ - check_db_version, create_db_version_file, is_block_compatible_version, DatabaseVersionError, - CURRENT_DB_VERSION, + check_db_version, create_db_version_file, get_db_version, is_block_compatible_version, + DatabaseVersionError, CURRENT_DB_VERSION, }; /// Initialize the database at the given path and returning a handle to the its @@ -62,7 +62,12 @@ pub fn init_db>(path: P) -> anyhow::Result { } } - let env = open_db(path)?; + // After ensuring the version file exists, get the actual version from the file + let version = get_db_version(&path).with_context(|| { + format!("Reading database version from path {}", path.as_ref().display()) + })?; + + let env = open_db(path, version)?; env.create_tables()?; Ok(env) } @@ -85,8 +90,8 @@ pub fn init_ephemeral_db() -> anyhow::Result { } /// Open the database at the given `path` in read-write mode. -pub fn open_db>(path: P) -> anyhow::Result { - DbEnv::open(path.as_ref(), DbEnvKind::RW).with_context(|| { +pub fn open_db>(path: P, version: u32) -> anyhow::Result { + DbEnv::open(path.as_ref(), DbEnvKind::RW, version).with_context(|| { format!("Opening database in read-write mode at path {}", path.as_ref().display()) }) } diff --git a/crates/storage/db/src/mdbx/mod.rs b/crates/storage/db/src/mdbx/mod.rs index 81d94992a..bbd550d75 100644 --- a/crates/storage/db/src/mdbx/mod.rs +++ b/crates/storage/db/src/mdbx/mod.rs @@ -22,6 +22,7 @@ use crate::abstraction::Database; use crate::error::DatabaseError; use crate::tables::{TableType, Tables, NUM_TABLES}; use crate::utils; +use crate::version::CURRENT_DB_VERSION; const GIGABYTE: usize = 1024 * 1024 * 1024; const TERABYTE: usize = GIGABYTE * 1024; @@ -53,13 +54,15 @@ struct DbEnvInner { /// A flag inidicating whether the database is ephemeral or not. If `true`, the database will /// be deleted when the environment is dropped. ephemeral: bool, + /// The database schema version. + version: u32, } impl DbEnv { /// Opens the database at the specified path with the given `EnvKind`. /// /// It does not create the tables, for that call [`DbEnv::create_tables`]. - pub fn open(path: impl AsRef, kind: DbEnvKind) -> Result { + pub fn open(path: impl AsRef, kind: DbEnvKind, version: u32) -> Result { let mode = match kind { DbEnvKind::RO => Mode::ReadOnly, DbEnvKind::RW => Mode::ReadWrite { sync_mode: SyncMode::Durable }, @@ -89,7 +92,7 @@ impl DbEnv { let env = builder.open(path.as_ref()).map_err(DatabaseError::OpenEnv)?; let dir = path.as_ref().to_path_buf(); - let inner = DbEnvInner { env, dir, ephemeral: false }; + let inner = DbEnvInner { env, dir, ephemeral: false, version }; Ok(Self { inner: Arc::new(inner) }.with_metrics()) } @@ -121,7 +124,7 @@ impl DbEnv { let env = builder.open(path).map_err(DatabaseError::OpenEnv)?; let dir = path.to_path_buf(); - let inner = DbEnvInner { env, dir, ephemeral: true }; + let inner = DbEnvInner { env, dir, ephemeral: true, version: CURRENT_DB_VERSION }; Ok(Self { inner: Arc::new(inner) }.with_metrics()) } @@ -149,6 +152,11 @@ impl DbEnv { &self.inner.dir } + /// Returns the database schema version. + pub fn version(&self) -> u32 { + self.inner.version + } + fn with_metrics(self) -> Self { describe_gauge!("db.table_size", metrics::Unit::Bytes, "Total size of the table"); describe_gauge!("db.table_pages", metrics::Unit::Count, "Number of pages in the table"); From 8ad8fd6ac1bccc2718f71400bce7188cdcd4170b Mon Sep 17 00:00:00 2001 From: Ammar Arif Date: Wed, 2 Jul 2025 18:42:04 +0800 Subject: [PATCH 2/7] wip --- crates/storage/db/src/lib.rs | 152 ++++++++++++++------------ crates/storage/db/src/mdbx/builder.rs | 37 +++++++ crates/storage/db/src/mdbx/mod.rs | 114 +++++++++++++++---- crates/storage/db/src/version.rs | 25 ++++- 4 files changed, 228 insertions(+), 100 deletions(-) create mode 100644 crates/storage/db/src/mdbx/builder.rs diff --git a/crates/storage/db/src/lib.rs b/crates/storage/db/src/lib.rs index ce1d13ecd..fb3be1f77 100644 --- a/crates/storage/db/src/lib.rs +++ b/crates/storage/db/src/lib.rs @@ -25,75 +25,85 @@ use version::{ DatabaseVersionError, CURRENT_DB_VERSION, }; -/// Initialize the database at the given path and returning a handle to the its -/// environment. -/// -/// This will create the default tables, if necessary. -pub fn init_db>(path: P) -> anyhow::Result { - if is_database_empty(path.as_ref()) { - fs::create_dir_all(&path).with_context(|| { - format!("Creating database directory at path {}", path.as_ref().display()) - })?; - create_db_version_file(&path, CURRENT_DB_VERSION).with_context(|| { - format!("Inserting database version file at path {}", path.as_ref().display()) - })? - } else { - match check_db_version(&path) { - Ok(_) => {} - Err(DatabaseVersionError::MismatchVersion { found, .. }) => { - if is_block_compatible_version(found) { - println!("Using database version {} with block compatibility mode", found); - } else { - return Err(anyhow!(DatabaseVersionError::MismatchVersion { - expected: CURRENT_DB_VERSION, - found - })); +#[derive(Debug, Clone)] +pub struct Db { + env: DbEnv, + version: version::Version, +} + +impl Db { + /// Initialize the database at the given path and returning a handle to the its + /// environment. + /// + /// This will create the default tables, if necessary. + pub fn new>(path: P) -> anyhow::Result { + if is_database_empty(path.as_ref()) { + fs::create_dir_all(&path).with_context(|| { + format!("Creating database directory at path {}", path.as_ref().display()) + })?; + create_db_version_file(&path, CURRENT_DB_VERSION.inner()).with_context(|| { + format!("Inserting database version file at path {}", path.as_ref().display()) + })? + } else { + match check_db_version(&path) { + Ok(_) => {} + Err(DatabaseVersionError::MismatchVersion { found, .. }) => { + if is_block_compatible_version(found) { + println!("Using database version {} with block compatibility mode", found); + } else { + return Err(anyhow!(DatabaseVersionError::MismatchVersion { + expected: CURRENT_DB_VERSION.inner(), + found + })); + } } + Err(DatabaseVersionError::FileNotFound) => { + create_db_version_file(&path, CURRENT_DB_VERSION.inner()).with_context(|| { + format!( + "No database version file found. Inserting version file at path {}", + path.as_ref().display() + ) + })? + } + Err(err) => return Err(anyhow!(err)), } - Err(DatabaseVersionError::FileNotFound) => { - create_db_version_file(&path, CURRENT_DB_VERSION).with_context(|| { - format!( - "No database version file found. Inserting version file at path {}", - path.as_ref().display() - ) - })? - } - Err(err) => return Err(anyhow!(err)), } - } - // After ensuring the version file exists, get the actual version from the file - let version = get_db_version(&path).with_context(|| { - format!("Reading database version from path {}", path.as_ref().display()) - })?; + // After ensuring the version file exists, get the actual version from the file + let version = get_db_version(&path).with_context(|| { + format!("Reading database version from path {}", path.as_ref().display()) + })?; - let env = open_db(path, version)?; - env.create_tables()?; - Ok(env) -} + let env = mdbx::DbEnv::open(path, mdbx::DbEnvKind::RW)?; + env.create_tables()?; + Ok(Self { env, version }) + } -/// Similar to [`init_db`] but will initialize a temporary database. -/// -/// Though it is useful for testing per se, but the initial motivation to implement this -/// variation of database is to be used as the backend for the in-memory storage -/// provider. Mainly to avoid having two separate implementations for the in-memory and -/// persistent db. Simplifying it to using a single solid implementation. -/// -/// As such, this database environment will trade off durability for write performance and shouldn't -/// be used in the case where data persistence is required. For that, use [`init_db`]. -pub fn init_ephemeral_db() -> anyhow::Result { - // Because the underlying database will always be removed, so there's no need to include the - // version file. - let env = DbEnv::open_ephemeral().context("Opening ephemeral database")?; - env.create_tables()?; - Ok(env) -} + /// Similar to [`init_db`] but will initialize a temporary database. + /// + /// Though it is useful for testing per se, but the initial motivation to implement this + /// variation of database is to be used as the backend for the in-memory storage + /// provider. Mainly to avoid having two separate implementations for the in-memory and + /// persistent db. Simplifying it to using a single solid implementation. + /// + /// As such, this database environment will trade off durability for write performance and shouldn't + /// be used in the case where data persistence is required. For that, use [`init_db`]. + pub fn in_memory() -> anyhow::Result { + let env = DbEnv::open_ephemeral().context("Opening ephemeral database")?; + env.create_tables()?; + Ok(Self { env, version: CURRENT_DB_VERSION }) + } -/// Open the database at the given `path` in read-write mode. -pub fn open_db>(path: P, version: u32) -> anyhow::Result { - DbEnv::open(path.as_ref(), DbEnvKind::RW, version).with_context(|| { - format!("Opening database in read-write mode at path {}", path.as_ref().display()) - }) + /// Open the database at the given `path` in read-write mode. + pub fn open>(path: P) -> anyhow::Result { + let env = DbEnv::open(path.as_ref(), DbEnvKind::RW).with_context(|| { + format!("Opening database in read-write mode at path {}", path.as_ref().display()) + })?; + let version = get_db_version(path.as_ref()).with_context(|| { + format!("Getting database version at path {}", path.as_ref().display()) + })?; + Ok(Self { env, version }) + } } #[cfg(test)] @@ -102,12 +112,12 @@ mod tests { use std::fs; use crate::version::{default_version_file_path, get_db_version, CURRENT_DB_VERSION}; - use crate::{init_db, init_ephemeral_db}; + use crate::{Db}; #[test] fn initialize_db_in_empty_dir() { let path = tempfile::tempdir().unwrap(); - init_db(path.path()).unwrap(); + Db::new(path.path()).unwrap(); let version_file = fs::File::open(default_version_file_path(path.path())).unwrap(); let actual_version = get_db_version(path.path()).unwrap(); @@ -123,10 +133,10 @@ mod tests { fn initialize_db_in_existing_db_dir() { let path = tempfile::tempdir().unwrap(); - init_db(path.path()).unwrap(); + Db::new(path.path()).unwrap(); let version = get_db_version(path.path()).unwrap(); - init_db(path.path()).unwrap(); + Db::new(path.path()).unwrap(); let same_version = get_db_version(path.path()).unwrap(); assert_eq!(version, same_version); @@ -138,7 +148,7 @@ mod tests { let version_file_path = default_version_file_path(path.path()); fs::write(version_file_path, b"malformed").unwrap(); - let err = init_db(path.path()).unwrap_err(); + let err = Db::new(path.path()).unwrap_err(); assert!(err.to_string().contains("Malformed database version file")); } @@ -148,18 +158,18 @@ mod tests { let version_file_path = default_version_file_path(path.path()); fs::write(version_file_path, 99u32.to_be_bytes()).unwrap(); - let err = init_db(path.path()).unwrap_err(); + let err = Db::new(path.path()).unwrap_err(); assert!(err.to_string().contains("Database version mismatch")); } #[test] fn initialize_db_with_missing_version_file() { let path = tempfile::tempdir().unwrap(); - init_db(path.path()).unwrap(); + Db::new(path.path()).unwrap(); fs::remove_file(default_version_file_path(path.path())).unwrap(); - init_db(path.path()).unwrap(); + Db::new(path.path()).unwrap(); let actual_version = get_db_version(path.path()).unwrap(); assert_eq!(actual_version, CURRENT_DB_VERSION); } @@ -167,7 +177,7 @@ mod tests { #[test] fn ephemeral_db_deletion_on_drop() { // Create an ephemeral database - let db = init_ephemeral_db().expect("failed to create ephemeral database"); + let db = Db::in_memory().expect("failed to create ephemeral database"); let dir_path = db.path().to_path_buf(); // Ensure the directory exists diff --git a/crates/storage/db/src/mdbx/builder.rs b/crates/storage/db/src/mdbx/builder.rs new file mode 100644 index 000000000..98c404b9d --- /dev/null +++ b/crates/storage/db/src/mdbx/builder.rs @@ -0,0 +1,37 @@ +//! Builder for [`DbEnv`]. + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn builder_default() { + let builder = DbEnvBuilder::default(); + assert!(matches!(builder.kind, DbEnvKind::RW)); + assert!(!builder.ephemeral); + assert_eq!(builder.version.inner(), CURRENT_DB_VERSION.inner()); + } + + #[test] + fn builder_ephemeral() { + let db = DbEnvBuilder::new(DbEnvKind::RW) + .ephemeral() + .build_ephemeral() + .expect("Failed to create ephemeral database"); + + assert!(db.path().exists()); + } + + #[test] + fn builder_custom_settings() { + let builder = DbEnvBuilder::new(DbEnvKind::RO) + .max_readers(1000) + .max_dbs(50) + .with_version(Version::new(42)); + + assert!(matches!(builder.kind, DbEnvKind::RO)); + assert_eq!(builder.max_readers, Some(1000)); + assert_eq!(builder.max_dbs, Some(50)); + assert_eq!(builder.version.inner(), 42); + } +} diff --git a/crates/storage/db/src/mdbx/mod.rs b/crates/storage/db/src/mdbx/mod.rs index bbd550d75..e43783c45 100644 --- a/crates/storage/db/src/mdbx/mod.rs +++ b/crates/storage/db/src/mdbx/mod.rs @@ -2,11 +2,8 @@ //! //! The code is adapted from `reth` mdbx implementation: -pub mod cursor; -pub mod stats; -pub mod tx; - use std::collections::HashMap; +use std::ops::Range; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -16,27 +13,98 @@ use libmdbx::{DatabaseFlags, EnvironmentFlags, Geometry, Mode, PageSize, SyncMod use metrics::{describe_gauge, Label}; use tracing::error; -use self::stats::{Stats, TableStat}; -use self::tx::Tx; use crate::abstraction::Database; use crate::error::DatabaseError; use crate::tables::{TableType, Tables, NUM_TABLES}; use crate::utils; +use crate::version::Version; use crate::version::CURRENT_DB_VERSION; +pub mod builder; +pub mod cursor; +pub mod stats; +pub mod tx; + +use self::stats::{Stats, TableStat}; +use self::tx::Tx; + const GIGABYTE: usize = 1024 * 1024 * 1024; const TERABYTE: usize = GIGABYTE * 1024; /// MDBX allows up to 32767 readers (`MDBX_READERS_LIMIT`), but we limit it to slightly below that const DEFAULT_MAX_READERS: u64 = 32_000; -/// Environment used when opening a MDBX environment. RO/RW. +// /// Environment used when opening a MDBX environment. RO/RW. +// #[derive(Debug)] +// pub enum DbEnvKind { +// /// Read-only MDBX environment. +// RO, +// /// Read-write MDBX environment. +// RW, +// } + +/// Builder for configuring and creating a [`DbEnv`]. #[derive(Debug)] -pub enum DbEnvKind { - /// Read-only MDBX environment. - RO, - /// Read-write MDBX environment. - RW, +pub struct DbEnvBuilder { + max_readers: u64, + geometry: Option>>, +} + +impl DbEnvBuilder { + /// Creates a new builder with default settings for the specified environment kind. + pub fn new() -> Self { + Self { flags: None, geometry: None, max_readers: DEFAULT_MAX_READERS } + } + + /// Sets the database geometry configuration. + pub fn geometry(mut self, geometry: Geometry>) -> Self { + self.geometry = Some(geometry); + self + } + + /// Sets the environment flags. + pub fn flags(mut self, flags: EnvironmentFlags) -> Self { + self.flags = Some(flags); + self + } + + /// Sets the maximum number of readers. + pub fn max_readers(mut self, max_readers: u64) -> Self { + self.max_readers = max_readers; + self + } + + /// Builds the database environment at the specified path. + pub fn build( + self, + path: impl AsRef, + mode: libmdbx::Mode, + ) -> Result { + let mut builder = libmdbx::Environment::builder(); + + builder + .set_max_dbs(Tables::ALL.len()) + .set_geometry(self.geometry.unwrap_or_else(|| Geometry { + size: Some(0..(TERABYTE)), + growth_step: Some(4 * GIGABYTE as isize), + shrink_threshold: None, + page_size: Some(PageSize::Set(utils::default_page_size())), + })) + .set_flags(self.flags.unwrap_or_else(|| EnvironmentFlags { + mode, + no_rdahead: true, + coalesce: true, + ..Default::default() + })) + .set_max_readers(self.max_readers.unwrap_or(DEFAULT_MAX_READERS)); + + let env = builder.open(path.as_ref()).map_err(DatabaseError::OpenEnv)?; + let dir = path.as_ref().to_path_buf(); + + let inner = DbEnvInner { env, dir, ephemeral: self.ephemeral, version: self.version }; + + Ok(DbEnv { inner: std::sync::Arc::new(inner) }.with_metrics()) + } } /// Wrapper for `libmdbx-sys` environment. @@ -46,23 +114,23 @@ pub struct DbEnv { } #[derive(Debug)] -struct DbEnvInner { +pub(super) struct DbEnvInner { /// The handle to the MDBX environment. - env: libmdbx::Environment, + pub(super) env: libmdbx::Environment, /// The path where the database environemnt is stored at. - dir: PathBuf, + pub(super) dir: PathBuf, /// A flag inidicating whether the database is ephemeral or not. If `true`, the database will /// be deleted when the environment is dropped. - ephemeral: bool, + pub(super) ephemeral: bool, /// The database schema version. - version: u32, + pub(super) version: Version, } impl DbEnv { /// Opens the database at the specified path with the given `EnvKind`. /// /// It does not create the tables, for that call [`DbEnv::create_tables`]. - pub fn open(path: impl AsRef, kind: DbEnvKind, version: u32) -> Result { + pub fn open(path: impl AsRef, kind: DbEnvKind) -> Result { let mode = match kind { DbEnvKind::RO => Mode::ReadOnly, DbEnvKind::RW => Mode::ReadWrite { sync_mode: SyncMode::Durable }, @@ -92,7 +160,7 @@ impl DbEnv { let env = builder.open(path.as_ref()).map_err(DatabaseError::OpenEnv)?; let dir = path.as_ref().to_path_buf(); - let inner = DbEnvInner { env, dir, ephemeral: false, version }; + let inner = DbEnvInner { env, dir, ephemeral: false, version: CURRENT_DB_VERSION }; Ok(Self { inner: Arc::new(inner) }.with_metrics()) } @@ -153,11 +221,11 @@ impl DbEnv { } /// Returns the database schema version. - pub fn version(&self) -> u32 { + pub fn version(&self) -> Version { self.inner.version } - fn with_metrics(self) -> Self { + pub(super) fn with_metrics(self) -> Self { describe_gauge!("db.table_size", metrics::Unit::Bytes, "Total size of the table"); describe_gauge!("db.table_pages", metrics::Unit::Count, "Number of pages in the table"); describe_gauge!("db.table_entries", metrics::Unit::Count, "Number of entries in the table"); @@ -248,13 +316,13 @@ impl katana_metrics::Report for DbEnv { pub mod test_utils { use super::DbEnv; - use crate::init_ephemeral_db; + use crate::Db; const ERROR_DB_CREATION: &str = "Not able to create the mdbx file."; /// Create ephemeral database for testing pub fn create_test_db() -> DbEnv { - init_ephemeral_db().expect(ERROR_DB_CREATION) + Db::in_memory().expect(ERROR_DB_CREATION).env } } diff --git a/crates/storage/db/src/version.rs b/crates/storage/db/src/version.rs index 296c96541..183fbba16 100644 --- a/crates/storage/db/src/version.rs +++ b/crates/storage/db/src/version.rs @@ -5,7 +5,7 @@ use std::mem; use std::path::{Path, PathBuf}; /// Current version of the database. -pub const CURRENT_DB_VERSION: u32 = 7; +pub const CURRENT_DB_VERSION: Version = Version::new(7); /// Name of the version file. const DB_VERSION_FILE_NAME: &str = "db.version"; @@ -22,6 +22,19 @@ pub enum DatabaseVersionError { MismatchVersion { expected: u32, found: u32 }, } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct Version(u32); + +impl Version { + pub const fn new(version: u32) -> Self { + Version(version) + } + + pub const fn inner(self) -> u32 { + self.0 + } +} + /// Insert a version file at the given `path` with the specified `version`. If the `path` is a /// directory, the version file will be created inside it. Otherwise, the version file will be /// created exactly at `path`. @@ -52,7 +65,7 @@ pub(super) fn create_db_version_file( pub(super) fn check_db_version(path: impl AsRef) -> Result<(), DatabaseVersionError> { let version = get_db_version(path)?; if version != CURRENT_DB_VERSION { - Err(DatabaseVersionError::MismatchVersion { expected: CURRENT_DB_VERSION, found: version }) + Err(DatabaseVersionError::MismatchVersion { expected: CURRENT_DB_VERSION.inner(), found: version.inner() }) } else { Ok(()) } @@ -60,11 +73,11 @@ pub(super) fn check_db_version(path: impl AsRef) -> Result<(), DatabaseVer /// Check if database version is compatible for block data access. pub(super) fn is_block_compatible_version(version: u32) -> bool { - (5..=CURRENT_DB_VERSION).contains(&version) + (5..=CURRENT_DB_VERSION.inner()).contains(&version) } /// Get the version of the database at the given `path`. -pub fn get_db_version(path: impl AsRef) -> Result { +pub fn get_db_version(path: impl AsRef) -> Result { let path = path.as_ref(); let path = if path.is_dir() { default_version_file_path(path) } else { path.to_path_buf() }; @@ -73,7 +86,7 @@ pub fn get_db_version(path: impl AsRef) -> Result()]>::try_from(buf.as_slice())?; - Ok(u32::from_be_bytes(bytes)) + Ok(Version(u32::from_be_bytes(bytes))) } pub(super) fn default_version_file_path(path: &Path) -> PathBuf { @@ -86,6 +99,6 @@ mod tests { #[test] fn test_current_version() { use super::CURRENT_DB_VERSION; - assert_eq!(CURRENT_DB_VERSION, 7, "Invalid current database version") + assert_eq!(CURRENT_DB_VERSION.inner(), 7, "Invalid current database version") } } From f5f7d36c095c03196683acc457a95d0df0867c69 Mon Sep 17 00:00:00 2001 From: Ammar Arif Date: Wed, 2 Jul 2025 19:54:52 +0800 Subject: [PATCH 3/7] wip --- .../src/implementation/blockifier/mod.rs | 2 ++ crates/storage/db/src/mdbx/mod.rs | 33 +++++++++---------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/crates/executor/src/implementation/blockifier/mod.rs b/crates/executor/src/implementation/blockifier/mod.rs index ab54fcb90..3d7deee1d 100644 --- a/crates/executor/src/implementation/blockifier/mod.rs +++ b/crates/executor/src/implementation/blockifier/mod.rs @@ -237,6 +237,8 @@ impl<'a> BlockExecutor<'a> for StarknetVMProcessor<'a> { receipt.resources_used().computation_resources.n_steps as u128; if let Some(reason) = receipt.revert_reason() { + println!("tx reverted: {reason}"); + break; info!(target: LOG_TARGET, hash = format!("{hash:#x}"), %reason, "Transaction reverted."); } diff --git a/crates/storage/db/src/mdbx/mod.rs b/crates/storage/db/src/mdbx/mod.rs index e43783c45..04f52fc04 100644 --- a/crates/storage/db/src/mdbx/mod.rs +++ b/crates/storage/db/src/mdbx/mod.rs @@ -47,26 +47,25 @@ const DEFAULT_MAX_READERS: u64 = 32_000; #[derive(Debug)] pub struct DbEnvBuilder { max_readers: u64, - geometry: Option>>, } impl DbEnvBuilder { /// Creates a new builder with default settings for the specified environment kind. pub fn new() -> Self { - Self { flags: None, geometry: None, max_readers: DEFAULT_MAX_READERS } + Self { max_readers: DEFAULT_MAX_READERS } } - /// Sets the database geometry configuration. - pub fn geometry(mut self, geometry: Geometry>) -> Self { - self.geometry = Some(geometry); - self - } + // /// Sets the database geometry configuration. + // pub fn geometry(mut self, geometry: Geometry>) -> Self { + // self.geometry = Some(geometry); + // self + // } - /// Sets the environment flags. - pub fn flags(mut self, flags: EnvironmentFlags) -> Self { - self.flags = Some(flags); - self - } + // /// Sets the environment flags. + // pub fn flags(mut self, flags: EnvironmentFlags) -> Self { + // self.flags = Some(flags); + // self + // } /// Sets the maximum number of readers. pub fn max_readers(mut self, max_readers: u64) -> Self { @@ -84,19 +83,19 @@ impl DbEnvBuilder { builder .set_max_dbs(Tables::ALL.len()) - .set_geometry(self.geometry.unwrap_or_else(|| Geometry { + .set_geometry(Geometry { size: Some(0..(TERABYTE)), growth_step: Some(4 * GIGABYTE as isize), shrink_threshold: None, page_size: Some(PageSize::Set(utils::default_page_size())), - })) - .set_flags(self.flags.unwrap_or_else(|| EnvironmentFlags { + }) + .set_flags(EnvironmentFlags { mode, no_rdahead: true, coalesce: true, ..Default::default() - })) - .set_max_readers(self.max_readers.unwrap_or(DEFAULT_MAX_READERS)); + }) + .set_max_readers(self.max_readers); let env = builder.open(path.as_ref()).map_err(DatabaseError::OpenEnv)?; let dir = path.as_ref().to_path_buf(); From dd8e0fd4c83542ef1ffd0089331a802935f9d599 Mon Sep 17 00:00:00 2001 From: Ammar Arif Date: Wed, 2 Jul 2025 21:51:01 +0800 Subject: [PATCH 4/7] wip --- bin/katana/src/cli/db/mod.rs | 26 +-- bin/katana/tests/fixtures.rs | 7 +- crates/chain-spec/src/rollup/utils.rs | 4 +- crates/core/benches/commit.rs | 4 +- crates/core/src/backend/storage.rs | 5 +- .../core/src/service/block_producer_tests.rs | 2 +- crates/core/tests/backend.rs | 6 +- .../src/implementation/blockifier/mod.rs | 2 - crates/executor/tests/fixtures/mod.rs | 2 +- crates/node/src/lib.rs | 12 +- crates/storage/db/src/lib.rs | 128 ++++++++++---- crates/storage/db/src/mdbx/builder.rs | 37 ---- crates/storage/db/src/mdbx/mod.rs | 159 +++--------------- crates/storage/db/src/version.rs | 33 ++-- .../storage/provider/src/providers/db/mod.rs | 14 +- .../provider/src/providers/fork/mod.rs | 7 +- crates/storage/provider/src/test_utils.rs | 3 +- crates/storage/provider/tests/fixtures.rs | 4 +- 18 files changed, 171 insertions(+), 284 deletions(-) delete mode 100644 crates/storage/db/src/mdbx/builder.rs diff --git a/bin/katana/src/cli/db/mod.rs b/bin/katana/src/cli/db/mod.rs index df5c447ac..71af8fdb0 100644 --- a/bin/katana/src/cli/db/mod.rs +++ b/bin/katana/src/cli/db/mod.rs @@ -1,12 +1,10 @@ use std::path::{self}; -use anyhow::{Context, Result}; +use anyhow::Result; use clap::{Args, Subcommand}; use comfy_table::modifiers::UTF8_ROUND_CORNERS; use comfy_table::presets::UTF8_FULL; use comfy_table::Table; -use katana_db::mdbx::{DbEnv, DbEnvKind}; -use katana_db::version::get_db_version; mod prune; mod stats; @@ -44,22 +42,16 @@ impl DbArgs { /// /// The path is expanded and resolved to an absolute path before opening the database for clearer /// error messages. -pub fn open_db_ro(path: &str) -> Result { - let path = path::absolute(shellexpand::full(path)?.into_owned())?; - let version = get_db_version(&path).with_context(|| { - format!("Reading database version from path {}", path.display()) - })?; - DbEnv::open(&path, DbEnvKind::RO, version).with_context(|| { - format!("Opening database file in read-only mode at path {}", path.display()) - }) +pub fn open_db_ro(path: &str) -> Result { + katana_db::Db::open_ro(&path::absolute(shellexpand::full(path)?.into_owned())?) } -pub fn open_db_rw(path: &str) -> Result { - let path = path::absolute(shellexpand::full(path)?.into_owned())?; - let version = get_db_version(&path).with_context(|| { - format!("Reading database version from path {}", path.display()) - })?; - katana_db::open_db(path, version) +/// Open the database at `path` in read-write mode. +/// +/// The path is expanded and resolved to an absolute path before opening the database for clearer +/// error messages. +pub fn open_db_rw(path: &str) -> Result { + katana_db::Db::open(&path::absolute(shellexpand::full(path)?.into_owned())?) } /// Create a table with the default UTF-8 full border and rounded corners. diff --git a/bin/katana/tests/fixtures.rs b/bin/katana/tests/fixtures.rs index e75b279af..979155bfa 100644 --- a/bin/katana/tests/fixtures.rs +++ b/bin/katana/tests/fixtures.rs @@ -1,6 +1,5 @@ use std::collections::{BTreeMap, BTreeSet}; -use katana_db::mdbx::DbEnv; use katana_primitives::block::{Block, BlockHash, FinalityStatus}; use katana_primitives::class::{ClassHash, CompiledClassHash}; use katana_primitives::contract::{ContractAddress, Nonce, StorageKey, StorageValue}; @@ -20,7 +19,7 @@ pub struct TempDb { impl TempDb { pub fn new() -> Self { let temp_dir = tempfile::tempdir().expect("failed to create temp dir"); - katana_db::init_db(temp_dir.path()).expect("failed to initialize database"); + katana_db::Db::new(temp_dir.path()).expect("failed to initialize database"); Self { temp_dir } } @@ -32,11 +31,11 @@ impl TempDb { DbProvider::new(self.open_rw()) } - fn open_ro(&self) -> DbEnv { + fn open_ro(&self) -> katana_db::Db { katana::cli::db::open_db_ro(self.path_str()).unwrap() } - fn open_rw(&self) -> DbEnv { + fn open_rw(&self) -> katana_db::Db { katana::cli::db::open_db_rw(self.path_str()).unwrap() } diff --git a/crates/chain-spec/src/rollup/utils.rs b/crates/chain-spec/src/rollup/utils.rs index dc54c4be4..cf8a0a23c 100644 --- a/crates/chain-spec/src/rollup/utils.rs +++ b/crates/chain-spec/src/rollup/utils.rs @@ -409,7 +409,7 @@ mod tests { fn valid_transactions() { let chain_spec = chain_spec(1, true); - let provider = DbProvider::new_ephemeral(); + let provider = DbProvider::new_in_memory(); let ef = executor(&chain_spec); let mut executor = ef.with_state(provider.latest().unwrap()); @@ -426,7 +426,7 @@ mod tests { fn genesis_states() { let chain_spec = chain_spec(1, true); - let provider = DbProvider::new_ephemeral(); + let provider = DbProvider::new_in_memory(); let ef = executor(&chain_spec); let mut executor = ef.with_state(provider.latest().unwrap()); diff --git a/crates/core/benches/commit.rs b/crates/core/benches/commit.rs index 8c1b58236..03d10c66d 100644 --- a/crates/core/benches/commit.rs +++ b/crates/core/benches/commit.rs @@ -142,7 +142,7 @@ fn commit_small(c: &mut Criterion) { small_transactions, small_receipts.as_slice(), &small_state_updates, - DbProvider::new_ephemeral(), + DbProvider::new_in_memory(), ); c.bench_function("Serial", |b| { @@ -168,7 +168,7 @@ fn commit_big(c: &mut Criterion) { big_transactions, big_receipts.as_slice(), &big_state_updates, - DbProvider::new_ephemeral(), + DbProvider::new_in_memory(), ); c.bench_function("Serial", |b| { diff --git a/crates/core/src/backend/storage.rs b/crates/core/src/backend/storage.rs index 4a3eaa455..3d2715953 100644 --- a/crates/core/src/backend/storage.rs +++ b/crates/core/src/backend/storage.rs @@ -1,7 +1,6 @@ use std::sync::Arc; use anyhow::{anyhow, bail, Context, Result}; -use katana_db::mdbx::DbEnv; use katana_primitives::block::{ BlockHashOrNumber, BlockIdOrTag, BlockNumber, FinalityStatus, GasPrice, SealedBlockWithStatus, }; @@ -85,13 +84,13 @@ impl Blockchain { } /// Creates a new [Blockchain] from a database at `path` and `genesis` state. - pub fn new_with_db(db: DbEnv) -> Self { + pub fn new_with_db(db: katana_db::Db) -> Self { Self::new(DbProvider::new(db)) } /// Builds a new blockchain with a forked block. pub async fn new_from_forked( - db: DbEnv, + db: katana_db::Db, fork_url: Url, fork_block: Option, chain: &mut katana_chain_spec::dev::ChainSpec, diff --git a/crates/core/src/service/block_producer_tests.rs b/crates/core/src/service/block_producer_tests.rs index 25c91eb10..46314f0c4 100644 --- a/crates/core/src/service/block_producer_tests.rs +++ b/crates/core/src/service/block_producer_tests.rs @@ -13,7 +13,7 @@ use crate::backend::storage::Blockchain; fn test_backend() -> Arc> { let chain_spec = Arc::new(ChainSpec::dev()); let executor_factory = NoopExecutorFactory::new(); - let blockchain = Blockchain::new(DbProvider::new_ephemeral()); + let blockchain = Blockchain::new(DbProvider::new_in_memory()); let gas_oracle = GasOracle::fixed(Default::default(), Default::default()); let backend = Arc::new(Backend::new(chain_spec, blockchain, gas_oracle, executor_factory)); backend.init_genesis().expect("failed to initialize genesis"); diff --git a/crates/core/tests/backend.rs b/crates/core/tests/backend.rs index b978971ab..c59d084ef 100644 --- a/crates/core/tests/backend.rs +++ b/crates/core/tests/backend.rs @@ -33,7 +33,7 @@ fn executor(chain_spec: &ChainSpec) -> BlockifierFactory { } fn backend(chain_spec: &ChainSpec) -> Backend { - backend_with_db(chain_spec, DbProvider::new_ephemeral()) + backend_with_db(chain_spec, DbProvider::new_in_memory()) } fn backend_with_db(chain_spec: &ChainSpec, provider: impl Database) -> Backend { @@ -83,7 +83,7 @@ fn can_initialize_genesis(#[case] chain: ChainSpec) { #[case::dev(ChainSpec::Dev(dev_chain_spec()))] #[case::rollup(ChainSpec::Rollup(rollup_chain_spec()))] fn can_reinitialize_genesis(#[case] chain: ChainSpec) { - let db = DbProvider::new_ephemeral(); + let db = DbProvider::new_in_memory(); let backend = backend_with_db(&chain, db.clone()); backend.init_genesis().expect("failed to initialize genesis"); @@ -94,7 +94,7 @@ fn can_reinitialize_genesis(#[case] chain: ChainSpec) { #[test] fn reinitialize_with_different_rollup_chain_spec() { - let db = DbProvider::new_ephemeral(); + let db = DbProvider::new_in_memory(); let chain1 = ChainSpec::Rollup(rollup_chain_spec()); let backend1 = backend_with_db(&chain1, db.clone()); diff --git a/crates/executor/src/implementation/blockifier/mod.rs b/crates/executor/src/implementation/blockifier/mod.rs index 3d7deee1d..ab54fcb90 100644 --- a/crates/executor/src/implementation/blockifier/mod.rs +++ b/crates/executor/src/implementation/blockifier/mod.rs @@ -237,8 +237,6 @@ impl<'a> BlockExecutor<'a> for StarknetVMProcessor<'a> { receipt.resources_used().computation_resources.n_steps as u128; if let Some(reason) = receipt.revert_reason() { - println!("tx reverted: {reason}"); - break; info!(target: LOG_TARGET, hash = format!("{hash:#x}"), %reason, "Transaction reverted."); } diff --git a/crates/executor/tests/fixtures/mod.rs b/crates/executor/tests/fixtures/mod.rs index 898a33170..e1fc07c98 100644 --- a/crates/executor/tests/fixtures/mod.rs +++ b/crates/executor/tests/fixtures/mod.rs @@ -72,7 +72,7 @@ pub fn state_provider(chain: &ChainSpec) -> Box { let ChainSpec::Dev(chain) = chain else { panic!("should be dev chain spec") }; let states = chain.state_updates(); - let provider = DbProvider::new_ephemeral(); + let provider = DbProvider::new_in_memory(); let block = SealedBlockWithStatus { status: FinalityStatus::AcceptedOnL2, diff --git a/crates/node/src/lib.rs b/crates/node/src/lib.rs index f85d63f7d..94cbd88c5 100644 --- a/crates/node/src/lib.rs +++ b/crates/node/src/lib.rs @@ -25,7 +25,7 @@ use katana_core::constants::{ }; use katana_core::env::BlockContextGenerator; use katana_core::service::block_producer::BlockProducer; -use katana_db::mdbx::DbEnv; +use katana_db::Db; use katana_executor::implementation::blockifier::cache::ClassCache; use katana_executor::implementation::blockifier::BlockifierFactory; use katana_executor::ExecutionFlags; @@ -63,7 +63,7 @@ use crate::exit::NodeStoppedFuture; pub struct Node { config: Arc, pool: TxPool, - db: DbEnv, + db: katana_db::Db, rpc_server: RpcServer, task_manager: TaskManager, backend: Arc>, @@ -138,7 +138,7 @@ impl Node { return Err(anyhow::anyhow!("Forking is only supported in dev mode for now")); }; - let db = katana_db::init_ephemeral_db()?; + let db = katana_db::Db::in_memory()?; let (bc, block_num) = Blockchain::new_from_forked(db.clone(), cfg.url.clone(), cfg.block, chain_spec) .await?; @@ -149,10 +149,10 @@ impl Node { (bc, db, Some(forked_client)) } else if let Some(db_path) = &config.db.dir { - let db = katana_db::init_db(db_path)?; + let db = katana_db::Db::new(db_path)?; (Blockchain::new_with_db(db.clone()), db, None) } else { - let db = katana_db::init_ephemeral_db()?; + let db = katana_db::Db::in_memory()?; (Blockchain::new_with_db(db.clone()), db, None) }; @@ -371,7 +371,7 @@ impl Node { } /// Returns a reference to the node's database environment (if any). - pub fn db(&self) -> &DbEnv { + pub fn db(&self) -> &Db { &self.db } diff --git a/crates/storage/db/src/lib.rs b/crates/storage/db/src/lib.rs index fb3be1f77..0d1ba6e54 100644 --- a/crates/storage/db/src/lib.rs +++ b/crates/storage/db/src/lib.rs @@ -5,6 +5,7 @@ use std::fs; use std::path::Path; +use abstraction::Database; use anyhow::{anyhow, Context}; pub mod abstraction; @@ -18,11 +19,13 @@ pub mod trie; pub mod utils; pub mod version; -use mdbx::{DbEnv, DbEnvKind}; +use error::DatabaseError; +use mdbx::{DbEnv, DbEnvBuilder}; +use tracing::debug; use utils::is_database_empty; use version::{ - check_db_version, create_db_version_file, get_db_version, is_block_compatible_version, - DatabaseVersionError, CURRENT_DB_VERSION, + create_db_version_file, get_db_version, is_block_compatible_version, DatabaseVersionError, + CURRENT_DB_VERSION, }; #[derive(Debug, Clone)] @@ -37,45 +40,45 @@ impl Db { /// /// This will create the default tables, if necessary. pub fn new>(path: P) -> anyhow::Result { - if is_database_empty(path.as_ref()) { + let version = if is_database_empty(path.as_ref()) { fs::create_dir_all(&path).with_context(|| { format!("Creating database directory at path {}", path.as_ref().display()) })?; - create_db_version_file(&path, CURRENT_DB_VERSION.inner()).with_context(|| { + + create_db_version_file(&path, CURRENT_DB_VERSION).with_context(|| { format!("Inserting database version file at path {}", path.as_ref().display()) })? } else { - match check_db_version(&path) { - Ok(_) => {} - Err(DatabaseVersionError::MismatchVersion { found, .. }) => { - if is_block_compatible_version(found) { - println!("Using database version {} with block compatibility mode", found); - } else { + match get_db_version(&path) { + Ok(version) if version != CURRENT_DB_VERSION => { + if !is_block_compatible_version(&version) { return Err(anyhow!(DatabaseVersionError::MismatchVersion { - expected: CURRENT_DB_VERSION.inner(), - found + expected: CURRENT_DB_VERSION, + found: version })); } + debug!(target: "db", "Using database version {version} with block compatibility mode"); + version } + + Ok(version) => version, + Err(DatabaseVersionError::FileNotFound) => { - create_db_version_file(&path, CURRENT_DB_VERSION.inner()).with_context(|| { + create_db_version_file(&path, CURRENT_DB_VERSION).with_context(|| { format!( "No database version file found. Inserting version file at path {}", path.as_ref().display() ) })? } + Err(err) => return Err(anyhow!(err)), } - } + }; - // After ensuring the version file exists, get the actual version from the file - let version = get_db_version(&path).with_context(|| { - format!("Reading database version from path {}", path.as_ref().display()) - })?; + let env = DbEnvBuilder::new().write().build(path)?; + env.create_default_tables()?; - let env = mdbx::DbEnv::open(path, mdbx::DbEnvKind::RW)?; - env.create_tables()?; Ok(Self { env, version }) } @@ -86,24 +89,83 @@ impl Db { /// provider. Mainly to avoid having two separate implementations for the in-memory and /// persistent db. Simplifying it to using a single solid implementation. /// - /// As such, this database environment will trade off durability for write performance and shouldn't - /// be used in the case where data persistence is required. For that, use [`init_db`]. + /// As such, this database environment will trade off durability for write performance and + /// shouldn't be used in the case where data persistence is required. For that, use + /// [`init_db`]. pub fn in_memory() -> anyhow::Result { - let env = DbEnv::open_ephemeral().context("Opening ephemeral database")?; - env.create_tables()?; + let dir = tempfile::Builder::new().keep(true).tempdir()?; + let path = dir.path(); + + let env = mdbx::DbEnvBuilder::new().sync(libmdbx::SyncMode::UtterlyNoSync).build(path)?; + env.create_default_tables()?; + Ok(Self { env, version: CURRENT_DB_VERSION }) } - /// Open the database at the given `path` in read-write mode. + // Open the database at the given `path` in read-write mode. pub fn open>(path: P) -> anyhow::Result { - let env = DbEnv::open(path.as_ref(), DbEnvKind::RW).with_context(|| { - format!("Opening database in read-write mode at path {}", path.as_ref().display()) - })?; - let version = get_db_version(path.as_ref()).with_context(|| { - format!("Getting database version at path {}", path.as_ref().display()) - })?; + Self::open_inner(path, false) + } + + // Open the database at the given `path` in read-write mode. + pub fn open_ro>(path: P) -> anyhow::Result { + Self::open_inner(path, true) + } + + fn open_inner>(path: P, read_only: bool) -> anyhow::Result { + let path = path.as_ref(); + let builder = DbEnvBuilder::new(); + + let env = if read_only { + builder.build(path).with_context(|| { + format!("Opening database in read-only mode at path {}", path.display()) + })? + } else { + builder.write().build(path).with_context(|| { + format!("Opening database in read-write mode at path {}", path.display()) + })? + }; + + let version = get_db_version(path) + .with_context(|| format!("Getting database version at path {}", path.display()))?; + Ok(Self { env, version }) } + + pub fn require_migration(&self) -> bool { + self.version != CURRENT_DB_VERSION + } + + pub fn path(&self) -> &Path { + self.env.path() + } +} + +/// Main persistent database trait. The database implementation must be transactional. +impl Database for Db { + type Tx = ::Tx; + type TxMut = ::TxMut; + type Stats = ::Stats; + + #[track_caller] + fn tx(&self) -> Result { + self.env.tx() + } + + #[track_caller] + fn tx_mut(&self) -> Result { + self.env.tx_mut() + } + + fn stats(&self) -> Result { + self.env.stats() + } +} + +impl katana_metrics::Report for Db { + fn report(&self) { + self.env.report() + } } #[cfg(test)] @@ -112,7 +174,7 @@ mod tests { use std::fs; use crate::version::{default_version_file_path, get_db_version, CURRENT_DB_VERSION}; - use crate::{Db}; + use crate::Db; #[test] fn initialize_db_in_empty_dir() { diff --git a/crates/storage/db/src/mdbx/builder.rs b/crates/storage/db/src/mdbx/builder.rs deleted file mode 100644 index 98c404b9d..000000000 --- a/crates/storage/db/src/mdbx/builder.rs +++ /dev/null @@ -1,37 +0,0 @@ -//! Builder for [`DbEnv`]. - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn builder_default() { - let builder = DbEnvBuilder::default(); - assert!(matches!(builder.kind, DbEnvKind::RW)); - assert!(!builder.ephemeral); - assert_eq!(builder.version.inner(), CURRENT_DB_VERSION.inner()); - } - - #[test] - fn builder_ephemeral() { - let db = DbEnvBuilder::new(DbEnvKind::RW) - .ephemeral() - .build_ephemeral() - .expect("Failed to create ephemeral database"); - - assert!(db.path().exists()); - } - - #[test] - fn builder_custom_settings() { - let builder = DbEnvBuilder::new(DbEnvKind::RO) - .max_readers(1000) - .max_dbs(50) - .with_version(Version::new(42)); - - assert!(matches!(builder.kind, DbEnvKind::RO)); - assert_eq!(builder.max_readers, Some(1000)); - assert_eq!(builder.max_dbs, Some(50)); - assert_eq!(builder.version.inner(), 42); - } -} diff --git a/crates/storage/db/src/mdbx/mod.rs b/crates/storage/db/src/mdbx/mod.rs index 04f52fc04..93a8e823a 100644 --- a/crates/storage/db/src/mdbx/mod.rs +++ b/crates/storage/db/src/mdbx/mod.rs @@ -1,15 +1,10 @@ -//! MDBX backend for the database. -//! -//! The code is adapted from `reth` mdbx implementation: - use std::collections::HashMap; -use std::ops::Range; use std::path::{Path, PathBuf}; use std::sync::Arc; use katana_metrics::metrics::gauge; pub use libmdbx; -use libmdbx::{DatabaseFlags, EnvironmentFlags, Geometry, Mode, PageSize, SyncMode, RO, RW}; +use libmdbx::{DatabaseFlags, EnvironmentFlags, Geometry, PageSize, SyncMode, RO, RW}; use metrics::{describe_gauge, Label}; use tracing::error; @@ -17,10 +12,7 @@ use crate::abstraction::Database; use crate::error::DatabaseError; use crate::tables::{TableType, Tables, NUM_TABLES}; use crate::utils; -use crate::version::Version; -use crate::version::CURRENT_DB_VERSION; -pub mod builder; pub mod cursor; pub mod stats; pub mod tx; @@ -34,51 +26,37 @@ const TERABYTE: usize = GIGABYTE * 1024; /// MDBX allows up to 32767 readers (`MDBX_READERS_LIMIT`), but we limit it to slightly below that const DEFAULT_MAX_READERS: u64 = 32_000; -// /// Environment used when opening a MDBX environment. RO/RW. -// #[derive(Debug)] -// pub enum DbEnvKind { -// /// Read-only MDBX environment. -// RO, -// /// Read-write MDBX environment. -// RW, -// } - /// Builder for configuring and creating a [`DbEnv`]. #[derive(Debug)] pub struct DbEnvBuilder { + mode: libmdbx::Mode, max_readers: u64, } impl DbEnvBuilder { /// Creates a new builder with default settings for the specified environment kind. pub fn new() -> Self { - Self { max_readers: DEFAULT_MAX_READERS } + Self { mode: libmdbx::Mode::ReadOnly, max_readers: DEFAULT_MAX_READERS } } - // /// Sets the database geometry configuration. - // pub fn geometry(mut self, geometry: Geometry>) -> Self { - // self.geometry = Some(geometry); - // self - // } - - // /// Sets the environment flags. - // pub fn flags(mut self, flags: EnvironmentFlags) -> Self { - // self.flags = Some(flags); - // self - // } - /// Sets the maximum number of readers. pub fn max_readers(mut self, max_readers: u64) -> Self { self.max_readers = max_readers; self } + pub fn write(mut self) -> Self { + self.mode = libmdbx::Mode::ReadWrite { sync_mode: SyncMode::Durable }; + self + } + + pub fn sync(mut self, sync_mode: libmdbx::SyncMode) -> Self { + self.mode = libmdbx::Mode::ReadWrite { sync_mode }; + self + } + /// Builds the database environment at the specified path. - pub fn build( - self, - path: impl AsRef, - mode: libmdbx::Mode, - ) -> Result { + pub fn build(self, path: impl AsRef) -> Result { let mut builder = libmdbx::Environment::builder(); builder @@ -90,7 +68,7 @@ impl DbEnvBuilder { page_size: Some(PageSize::Set(utils::default_page_size())), }) .set_flags(EnvironmentFlags { - mode, + mode: self.mode, no_rdahead: true, coalesce: true, ..Default::default() @@ -100,16 +78,20 @@ impl DbEnvBuilder { let env = builder.open(path.as_ref()).map_err(DatabaseError::OpenEnv)?; let dir = path.as_ref().to_path_buf(); - let inner = DbEnvInner { env, dir, ephemeral: self.ephemeral, version: self.version }; + Ok(DbEnv { inner: Arc::new(DbEnvInner { env, dir }) }.with_metrics()) + } +} - Ok(DbEnv { inner: std::sync::Arc::new(inner) }.with_metrics()) +impl Default for DbEnvBuilder { + fn default() -> Self { + Self::new() } } /// Wrapper for `libmdbx-sys` environment. #[derive(Debug, Clone)] pub struct DbEnv { - inner: Arc, + pub(crate) inner: Arc, } #[derive(Debug)] @@ -118,86 +100,11 @@ pub(super) struct DbEnvInner { pub(super) env: libmdbx::Environment, /// The path where the database environemnt is stored at. pub(super) dir: PathBuf, - /// A flag inidicating whether the database is ephemeral or not. If `true`, the database will - /// be deleted when the environment is dropped. - pub(super) ephemeral: bool, - /// The database schema version. - pub(super) version: Version, } impl DbEnv { - /// Opens the database at the specified path with the given `EnvKind`. - /// - /// It does not create the tables, for that call [`DbEnv::create_tables`]. - pub fn open(path: impl AsRef, kind: DbEnvKind) -> Result { - let mode = match kind { - DbEnvKind::RO => Mode::ReadOnly, - DbEnvKind::RW => Mode::ReadWrite { sync_mode: SyncMode::Durable }, - }; - - let mut builder = libmdbx::Environment::builder(); - builder - .set_max_dbs(Tables::ALL.len()) - .set_geometry(Geometry { - // Maximum database size of 1 terabytes - size: Some(0..(TERABYTE)), - // We grow the database in increments of 4 gigabytes - growth_step: Some(4 * GIGABYTE as isize), - // The database never shrinks - shrink_threshold: None, - page_size: Some(PageSize::Set(utils::default_page_size())), - }) - .set_flags(EnvironmentFlags { - mode, - // We disable readahead because it improves performance for linear scans, but - // worsens it for random access (which is our access pattern outside of sync) - no_rdahead: true, - coalesce: true, - ..Default::default() - }) - .set_max_readers(DEFAULT_MAX_READERS); - - let env = builder.open(path.as_ref()).map_err(DatabaseError::OpenEnv)?; - let dir = path.as_ref().to_path_buf(); - let inner = DbEnvInner { env, dir, ephemeral: false, version: CURRENT_DB_VERSION }; - - Ok(Self { inner: Arc::new(inner) }.with_metrics()) - } - - /// Opens an ephemeral database. Temporary database environment whose underlying directory will - /// be deleted when the returned [`DbEnv`] is dropped. - pub fn open_ephemeral() -> Result { - let dir = - tempfile::Builder::new().keep(true).tempdir().expect("failed to create a temp dir"); - let path = dir.path(); - - let mut builder = libmdbx::Environment::builder(); - builder - .set_max_dbs(Tables::ALL.len()) - .set_geometry(Geometry { - size: Some(0..(GIGABYTE * 10)), // 10gb - growth_step: Some((GIGABYTE / 2) as isize), // 512mb - shrink_threshold: None, - page_size: Some(PageSize::Set(utils::default_page_size())), - }) - .set_flags(EnvironmentFlags { - // we dont care about durability here - mode: Mode::ReadWrite { sync_mode: SyncMode::UtterlyNoSync }, - no_rdahead: true, - coalesce: true, - ..Default::default() - }) - .set_max_readers(DEFAULT_MAX_READERS); - - let env = builder.open(path).map_err(DatabaseError::OpenEnv)?; - let dir = path.to_path_buf(); - let inner = DbEnvInner { env, dir, ephemeral: true, version: CURRENT_DB_VERSION }; - - Ok(Self { inner: Arc::new(inner) }.with_metrics()) - } - /// Creates all the defined tables in [`Tables`], if necessary. - pub fn create_tables(&self) -> Result<(), DatabaseError> { + pub fn create_default_tables(&self) -> Result<(), DatabaseError> { let tx = self.inner.env.begin_rw_txn().map_err(DatabaseError::CreateRWTx)?; for table in Tables::ALL { @@ -219,11 +126,6 @@ impl DbEnv { &self.inner.dir } - /// Returns the database schema version. - pub fn version(&self) -> Version { - self.inner.version - } - pub(super) fn with_metrics(self) -> Self { describe_gauge!("db.table_size", metrics::Unit::Bytes, "Total size of the table"); describe_gauge!("db.table_pages", metrics::Unit::Count, "Number of pages in the table"); @@ -325,21 +227,6 @@ pub mod test_utils { } } -impl Drop for DbEnv { - fn drop(&mut self) { - // Try to get a mutable reference, this will return Some if there's only a single reference - // left. - if let Some(inner) = Arc::get_mut(&mut self.inner) { - // And if it is ephemeral, remove the directory. - if inner.ephemeral { - if let Err(e) = std::fs::remove_dir_all(&inner.dir) { - eprintln!("Failed to remove temporary directory: {e}"); - } - } - } - } -} - #[cfg(test)] mod tests { diff --git a/crates/storage/db/src/version.rs b/crates/storage/db/src/version.rs index 183fbba16..4cb7d6fcd 100644 --- a/crates/storage/db/src/version.rs +++ b/crates/storage/db/src/version.rs @@ -1,4 +1,5 @@ use std::array::TryFromSliceError; +use std::fmt::Display; use std::fs::{self}; use std::io::{Read, Write}; use std::mem; @@ -19,7 +20,7 @@ pub enum DatabaseVersionError { #[error("Malformed database version file: {0}")] MalformedContent(#[from] TryFromSliceError), #[error("Database version mismatch. Expected version {expected}, found version {found}.")] - MismatchVersion { expected: u32, found: u32 }, + MismatchVersion { expected: Version, found: Version }, } #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -29,9 +30,11 @@ impl Version { pub const fn new(version: u32) -> Self { Version(version) } +} - pub const fn inner(self) -> u32 { - self.0 +impl Display for Version { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) } } @@ -46,8 +49,8 @@ impl Version { /// Will fail if all the directories in `path` has not already been created. pub(super) fn create_db_version_file( path: impl AsRef, - version: u32, -) -> Result<(), DatabaseVersionError> { + version: Version, +) -> Result { let path = path.as_ref(); let path = if path.is_dir() { default_version_file_path(path) } else { path.to_path_buf() }; @@ -56,24 +59,14 @@ pub(super) fn create_db_version_file( permissions.set_readonly(true); file.set_permissions(permissions)?; - file.write_all(&version.to_be_bytes()).map_err(DatabaseVersionError::Io) -} + file.write_all(&version.0.to_be_bytes()).map_err(DatabaseVersionError::Io)?; -/// Check the version of the database at the given `path`. -/// -/// Returning `Ok` if the version matches with [`CURRENT_DB_VERSION`], otherwise `Err` is returned. -pub(super) fn check_db_version(path: impl AsRef) -> Result<(), DatabaseVersionError> { - let version = get_db_version(path)?; - if version != CURRENT_DB_VERSION { - Err(DatabaseVersionError::MismatchVersion { expected: CURRENT_DB_VERSION.inner(), found: version.inner() }) - } else { - Ok(()) - } + Ok(version) } /// Check if database version is compatible for block data access. -pub(super) fn is_block_compatible_version(version: u32) -> bool { - (5..=CURRENT_DB_VERSION.inner()).contains(&version) +pub(super) fn is_block_compatible_version(version: &Version) -> bool { + (5..=CURRENT_DB_VERSION.0).contains(&version.0) } /// Get the version of the database at the given `path`. @@ -99,6 +92,6 @@ mod tests { #[test] fn test_current_version() { use super::CURRENT_DB_VERSION; - assert_eq!(CURRENT_DB_VERSION.inner(), 7, "Invalid current database version") + assert_eq!(CURRENT_DB_VERSION.0, 7, "Invalid current database version") } } diff --git a/crates/storage/provider/src/providers/db/mod.rs b/crates/storage/provider/src/providers/db/mod.rs index a9c681bc1..7ce968f50 100644 --- a/crates/storage/provider/src/providers/db/mod.rs +++ b/crates/storage/provider/src/providers/db/mod.rs @@ -7,8 +7,6 @@ use std::ops::{Range, RangeInclusive}; use katana_db::abstraction::{Database, DbCursor, DbCursorMut, DbDupSortCursor, DbTx, DbTxMut}; use katana_db::error::DatabaseError; -use katana_db::init_ephemeral_db; -use katana_db::mdbx::DbEnv; use katana_db::models::block::StoredBlockBodyIndices; use katana_db::models::contract::{ ContractClassChange, ContractInfoChangeList, ContractNonceChange, @@ -51,7 +49,7 @@ use crate::ProviderResult; /// A provider implementation that uses a persistent database as the backend. // TODO: remove the default generic type #[derive(Debug, Clone)] -pub struct DbProvider(pub(crate) Db); +pub struct DbProvider(pub(crate) Db); impl DbProvider { /// Creates a new [`DbProvider`] from the given [`DbEnv`]. @@ -65,10 +63,10 @@ impl DbProvider { } } -impl DbProvider { - /// Creates a new [`DbProvider`] using an ephemeral database. - pub fn new_ephemeral() -> Self { - let db = init_ephemeral_db().expect("Failed to initialize ephemeral database"); +impl DbProvider { + /// Creates a new [`DbProvider`] using an in-memory database. + pub fn new_in_memory() -> Self { + let db = katana_db::Db::in_memory().expect("Failed to initialize in-memory database"); Self(db) } } @@ -940,7 +938,7 @@ mod tests { } fn create_db_provider() -> DbProvider { - DbProvider(katana_db::mdbx::test_utils::create_test_db()) + DbProvider::new_in_memory() } #[test] diff --git a/crates/storage/provider/src/providers/fork/mod.rs b/crates/storage/provider/src/providers/fork/mod.rs index c6ff6e5ae..924aab398 100644 --- a/crates/storage/provider/src/providers/fork/mod.rs +++ b/crates/storage/provider/src/providers/fork/mod.rs @@ -3,7 +3,6 @@ use std::ops::{Range, RangeInclusive}; use std::sync::Arc; use katana_db::abstraction::Database; -use katana_db::mdbx::DbEnv; use katana_db::models::block::StoredBlockBodyIndices; use katana_fork::{Backend, BackendClient}; use katana_primitives::block::{ @@ -38,7 +37,7 @@ mod state; mod trie; #[derive(Debug)] -pub struct ForkedProvider { +pub struct ForkedProvider { backend: BackendClient, provider: Arc>, } @@ -59,14 +58,14 @@ impl ForkedProvider { } } -impl ForkedProvider { +impl ForkedProvider { /// Creates a new [`ForkedProvider`] using an ephemeral database. pub fn new_ephemeral( block_id: BlockHashOrNumber, provider: Arc>, ) -> Self { let backend = Backend::new(provider, block_id).expect("failed to create backend"); - let provider = Arc::new(DbProvider::new_ephemeral()); + let provider = Arc::new(DbProvider::new_in_memory()); Self { provider, backend } } } diff --git a/crates/storage/provider/src/test_utils.rs b/crates/storage/provider/src/test_utils.rs index 2a629ce4c..71c80703e 100644 --- a/crates/storage/provider/src/test_utils.rs +++ b/crates/storage/provider/src/test_utils.rs @@ -1,7 +1,6 @@ use std::sync::Arc; use alloy_primitives::U256; -use katana_db::mdbx::test_utils; use katana_primitives::address; use katana_primitives::block::{Block, BlockHash, FinalityStatus}; use katana_primitives::contract::ContractAddress; @@ -17,7 +16,7 @@ use crate::traits::block::BlockWriter; /// Creates a persistent storage provider with initial states loaded for testin. pub fn test_provider() -> DbProvider { - let provider = DbProvider::new(test_utils::create_test_db()); + let provider = DbProvider::new_in_memory(); initialize_test_provider(&provider); provider } diff --git a/crates/storage/provider/tests/fixtures.rs b/crates/storage/provider/tests/fixtures.rs index 82ed3d11b..99ddc1c93 100644 --- a/crates/storage/provider/tests/fixtures.rs +++ b/crates/storage/provider/tests/fixtures.rs @@ -1,6 +1,5 @@ use std::collections::BTreeMap; -use katana_db::mdbx; use katana_primitives::address; use katana_primitives::block::{ BlockHashOrNumber, FinalityStatus, Header, SealedBlock, SealedBlockWithStatus, @@ -68,8 +67,7 @@ pub mod fork { #[rstest::fixture] pub fn db_provider() -> BlockchainProvider { - let env = mdbx::test_utils::create_test_db(); - BlockchainProvider::new(DbProvider::new(env)) + BlockchainProvider::new(DbProvider::new_in_memory()) } #[rstest::fixture] From 014bfc01c7a6e161ea5198662d64e80e2aad201b Mon Sep 17 00:00:00 2001 From: Ammar Arif Date: Wed, 2 Jul 2025 22:18:13 +0800 Subject: [PATCH 5/7] add back ephemeral db geometry --- crates/storage/db/src/lib.rs | 11 +++++++++- crates/storage/db/src/mdbx/mod.rs | 35 ++++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/crates/storage/db/src/lib.rs b/crates/storage/db/src/lib.rs index 0d1ba6e54..ca07aa3b9 100644 --- a/crates/storage/db/src/lib.rs +++ b/crates/storage/db/src/lib.rs @@ -20,6 +20,7 @@ pub mod utils; pub mod version; use error::DatabaseError; +use libmdbx::SyncMode; use mdbx::{DbEnv, DbEnvBuilder}; use tracing::debug; use utils::is_database_empty; @@ -28,6 +29,9 @@ use version::{ CURRENT_DB_VERSION, }; +const GIGABYTE: usize = 1024 * 1024 * 1024; +const TERABYTE: usize = GIGABYTE * 1024; + #[derive(Debug, Clone)] pub struct Db { env: DbEnv, @@ -96,7 +100,12 @@ impl Db { let dir = tempfile::Builder::new().keep(true).tempdir()?; let path = dir.path(); - let env = mdbx::DbEnvBuilder::new().sync(libmdbx::SyncMode::UtterlyNoSync).build(path)?; + let env = mdbx::DbEnvBuilder::new() + .max_size(GIGABYTE * 10) // 10gb + .growth_step((GIGABYTE / 2) as isize) // 512mb + .sync(SyncMode::UtterlyNoSync) + .build(path)?; + env.create_default_tables()?; Ok(Self { env, version: CURRENT_DB_VERSION }) diff --git a/crates/storage/db/src/mdbx/mod.rs b/crates/storage/db/src/mdbx/mod.rs index 93a8e823a..6cef555bf 100644 --- a/crates/storage/db/src/mdbx/mod.rs +++ b/crates/storage/db/src/mdbx/mod.rs @@ -11,7 +11,7 @@ use tracing::error; use crate::abstraction::Database; use crate::error::DatabaseError; use crate::tables::{TableType, Tables, NUM_TABLES}; -use crate::utils; +use crate::{utils, GIGABYTE, TERABYTE}; pub mod cursor; pub mod stats; @@ -20,23 +20,29 @@ pub mod tx; use self::stats::{Stats, TableStat}; use self::tx::Tx; -const GIGABYTE: usize = 1024 * 1024 * 1024; -const TERABYTE: usize = GIGABYTE * 1024; - /// MDBX allows up to 32767 readers (`MDBX_READERS_LIMIT`), but we limit it to slightly below that const DEFAULT_MAX_READERS: u64 = 32_000; +const DEFAULT_MAX_SIZE: usize = TERABYTE; +const DEFAULT_GROWTH_STEP: isize = 4 * GIGABYTE as isize; /// Builder for configuring and creating a [`DbEnv`]. #[derive(Debug)] pub struct DbEnvBuilder { mode: libmdbx::Mode, max_readers: u64, + max_size: usize, + growth_step: isize, } impl DbEnvBuilder { /// Creates a new builder with default settings for the specified environment kind. pub fn new() -> Self { - Self { mode: libmdbx::Mode::ReadOnly, max_readers: DEFAULT_MAX_READERS } + Self { + mode: libmdbx::Mode::ReadOnly, + max_readers: DEFAULT_MAX_READERS, + max_size: DEFAULT_MAX_SIZE, + growth_step: DEFAULT_GROWTH_STEP, + } } /// Sets the maximum number of readers. @@ -55,6 +61,16 @@ impl DbEnvBuilder { self } + pub fn max_size(mut self, max_size: usize) -> Self { + self.max_size = max_size; + self + } + + pub fn growth_step(mut self, growth_step: isize) -> Self { + self.growth_step = growth_step; + self + } + /// Builds the database environment at the specified path. pub fn build(self, path: impl AsRef) -> Result { let mut builder = libmdbx::Environment::builder(); @@ -62,13 +78,18 @@ impl DbEnvBuilder { builder .set_max_dbs(Tables::ALL.len()) .set_geometry(Geometry { - size: Some(0..(TERABYTE)), - growth_step: Some(4 * GIGABYTE as isize), + // Maximum database size of 1 terabytes + size: Some(0..(self.max_size)), + // We grow the database in increments of 4 gigabytes + growth_step: Some(self.growth_step), + // The database never shrinks shrink_threshold: None, page_size: Some(PageSize::Set(utils::default_page_size())), }) .set_flags(EnvironmentFlags { mode: self.mode, + // We disable readahead because it improves performance for linear scans, but + // worsens it for random access (which is our access pattern outside of sync) no_rdahead: true, coalesce: true, ..Default::default() From a79c66e7321dea78fc8747f2777d291791cec360 Mon Sep 17 00:00:00 2001 From: Ammar Arif Date: Wed, 2 Jul 2025 22:26:49 +0800 Subject: [PATCH 6/7] wip --- crates/storage/db/src/lib.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/crates/storage/db/src/lib.rs b/crates/storage/db/src/lib.rs index ca07aa3b9..dccfd4cfb 100644 --- a/crates/storage/db/src/lib.rs +++ b/crates/storage/db/src/lib.rs @@ -26,7 +26,7 @@ use tracing::debug; use utils::is_database_empty; use version::{ create_db_version_file, get_db_version, is_block_compatible_version, DatabaseVersionError, - CURRENT_DB_VERSION, + Version, CURRENT_DB_VERSION, }; const GIGABYTE: usize = 1024 * 1024 * 1024; @@ -35,7 +35,7 @@ const TERABYTE: usize = GIGABYTE * 1024; #[derive(Debug, Clone)] pub struct Db { env: DbEnv, - version: version::Version, + version: Version, } impl Db { @@ -145,6 +145,12 @@ impl Db { self.version != CURRENT_DB_VERSION } + /// Returns the version of the database. + pub fn version(&self) -> Version { + self.version + } + + /// Returns the path to the directory where the database is located. pub fn path(&self) -> &Path { self.env.path() } From f0a3b7bfff3f1813bf7c3f668dd39fd12f3cbfa7 Mon Sep 17 00:00:00 2001 From: Ammar Arif Date: Wed, 2 Jul 2025 23:00:54 +0800 Subject: [PATCH 7/7] fix --- crates/node/src/full/mod.rs | 7 +++---- crates/storage/db/src/lib.rs | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/node/src/full/mod.rs b/crates/node/src/full/mod.rs index 1fd219ba0..3564d858d 100644 --- a/crates/node/src/full/mod.rs +++ b/crates/node/src/full/mod.rs @@ -8,7 +8,6 @@ use std::sync::Arc; use anyhow::Result; use exit::NodeStoppedFuture; -use katana_db::mdbx::DbEnv; use katana_feeder_gateway::client::SequencerGateway; use katana_metrics::exporters::prometheus::PrometheusRecorder; use katana_metrics::{Report, Server as MetricsServer}; @@ -38,7 +37,7 @@ pub struct Config { #[derive(Debug)] pub struct Node { - pub db: DbEnv, + pub db: katana_db::Db, pub pool: TxPool, pub config: Arc, pub task_manager: TaskManager, @@ -62,7 +61,7 @@ impl Node { let path = config.db.dir.clone().expect("database path must exist"); info!(target: "node", path = %path.display(), "Initializing database."); - let db = katana_db::init_db(path)?; + let db = katana_db::Db::new(path)?; let provider = DbProvider::new(db.clone()); @@ -134,7 +133,7 @@ impl Node { #[derive(Debug)] pub struct LaunchedNode { - pub db: DbEnv, + pub db: katana_db::Db, pub pool: TxPool, pub task_manager: TaskManager, pub config: Arc, diff --git a/crates/storage/db/src/lib.rs b/crates/storage/db/src/lib.rs index dccfd4cfb..4d2536017 100644 --- a/crates/storage/db/src/lib.rs +++ b/crates/storage/db/src/lib.rs @@ -252,6 +252,7 @@ mod tests { } #[test] + #[ignore = "unignore once we actually delete the temp directory"] fn ephemeral_db_deletion_on_drop() { // Create an ephemeral database let db = Db::in_memory().expect("failed to create ephemeral database");