Skip to content

Commit 692587a

Browse files
authored
feat(db): database operations metrics (#367)
1 parent 39fbd31 commit 692587a

File tree

3 files changed

+293
-34
lines changed

3 files changed

+293
-34
lines changed
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
//! Metrics collection for database operations.
2+
//!
3+
//! This module provides metrics tracking for:
4+
//! - Transaction lifecycle (creation, commit, abort)
5+
//! - Database operations (get, put, delete, clear)
6+
//! - Operation timing and success rates
7+
8+
use std::sync::Arc;
9+
use std::time::Instant;
10+
11+
use katana_metrics::metrics::{Counter, Histogram};
12+
use katana_metrics::Metrics;
13+
14+
/// Metrics for database operations.
15+
#[derive(Clone, Debug)]
16+
pub struct DbMetrics {
17+
inner: Arc<DbMetricsInner>,
18+
}
19+
20+
impl Default for DbMetrics {
21+
fn default() -> Self {
22+
Self::new()
23+
}
24+
}
25+
26+
impl DbMetrics {
27+
/// Creates a new instance of database metrics.
28+
pub fn new() -> Self {
29+
// database tables metrics
30+
::metrics::describe_gauge!(
31+
"db.table_size",
32+
::metrics::Unit::Bytes,
33+
"Total size of the table"
34+
);
35+
::metrics::describe_gauge!(
36+
"db.table_pages",
37+
::metrics::Unit::Count,
38+
"Number of pages in the table"
39+
);
40+
::metrics::describe_gauge!(
41+
"db.table_entries",
42+
::metrics::Unit::Count,
43+
"Number of entries in the table"
44+
);
45+
::metrics::describe_gauge!(
46+
"db.freelist",
47+
::metrics::Unit::Bytes,
48+
"Size of the database freelist"
49+
);
50+
51+
Self {
52+
inner: Arc::new(DbMetricsInner {
53+
transaction: DbTransactionMetrics::default(),
54+
operations: DbOperationMetrics::default(),
55+
}),
56+
}
57+
}
58+
59+
/// Records a transaction creation.
60+
pub fn record_ro_tx_create(&self) {
61+
self.inner.transaction.ro_created.increment(1);
62+
}
63+
64+
pub fn record_rw_tx_create(&self) {
65+
self.inner.transaction.rw_created.increment(1);
66+
}
67+
68+
/// Records a transaction commit with timing.
69+
///
70+
/// ## Arguments
71+
///
72+
/// * `duration` - Time taken for the get operation in seconds
73+
/// * `success` - Whether the commit operation completed sucessfully or not.
74+
pub fn record_tx_commit(&self, duration: f64, success: bool) {
75+
if success {
76+
self.inner.transaction.commits_successful.increment(1);
77+
} else {
78+
self.inner.transaction.commits_failed.increment(1);
79+
}
80+
81+
self.inner.transaction.commit_time_seconds.record(duration);
82+
}
83+
84+
/// Records a transaction abort.
85+
pub fn record_tx_abort(&self) {
86+
self.inner.transaction.aborts.increment(1);
87+
}
88+
89+
/// Records a get operation.
90+
///
91+
/// ## Arguments
92+
///
93+
/// * `duration` - Time taken for the get operation in seconds
94+
/// * `found` - Whether the requested value was found
95+
pub fn record_get(&self, duration: f64, found: bool) {
96+
if found {
97+
self.inner.operations.get_hits.increment(1);
98+
} else {
99+
self.inner.operations.get_misses.increment(1);
100+
}
101+
102+
self.inner.operations.get_time_seconds.record(duration);
103+
}
104+
105+
/// Records a put operation.
106+
///
107+
/// ## Arguments
108+
///
109+
/// * `duration` - Time taken for the put operation in seconds
110+
pub fn record_put(&self, duration: f64) {
111+
self.inner.operations.puts.increment(1);
112+
self.inner.operations.put_time_seconds.record(duration);
113+
}
114+
115+
/// Records a delete operation.
116+
///
117+
/// ## Arguments
118+
///
119+
/// * `duration` - Time taken for the delete operation in seconds
120+
pub fn record_delete(&self, duration: f64, deleted: bool) {
121+
if deleted {
122+
self.inner.operations.deletes_successful.increment(1);
123+
} else {
124+
self.inner.operations.deletes_failed.increment(1);
125+
}
126+
127+
self.inner.operations.delete_time_seconds.record(duration);
128+
}
129+
130+
/// Records a clear operation.
131+
///
132+
/// ## Arguments
133+
///
134+
/// * `duration` - Time taken for the clear operation in seconds
135+
pub fn record_clear(&self, duration: f64) {
136+
self.inner.operations.clears.increment(1);
137+
self.inner.operations.clear_time_seconds.record(duration);
138+
}
139+
}
140+
141+
#[derive(Debug)]
142+
struct DbMetricsInner {
143+
transaction: DbTransactionMetrics,
144+
operations: DbOperationMetrics,
145+
}
146+
147+
/// Metrics for database transactions.
148+
#[derive(Metrics, Clone)]
149+
#[metrics(scope = "db.transaction")]
150+
struct DbTransactionMetrics {
151+
/// Number of read-only transactions created
152+
ro_created: Counter,
153+
/// Number of read-write transactions created
154+
rw_created: Counter,
155+
/// Number of successful commits
156+
commits_successful: Counter,
157+
/// Number of failed commits
158+
commits_failed: Counter,
159+
/// Number of transaction aborts
160+
aborts: Counter,
161+
/// Time taken to commit a transaction
162+
commit_time_seconds: Histogram,
163+
}
164+
165+
/// Metrics for database operations.
166+
#[derive(Metrics, Clone)]
167+
#[metrics(scope = "db.operation")]
168+
struct DbOperationMetrics {
169+
/// Number of get operations that found a value
170+
get_hits: Counter,
171+
/// Number of get operations that didn't find a value
172+
get_misses: Counter,
173+
/// Time taken for get operations
174+
get_time_seconds: Histogram,
175+
/// Number of put operations
176+
puts: Counter,
177+
/// Time taken for put operations
178+
put_time_seconds: Histogram,
179+
/// Number of successful delete operations
180+
deletes_successful: Counter,
181+
/// Number of failed delete operations
182+
deletes_failed: Counter,
183+
/// Time taken for delete operations
184+
delete_time_seconds: Histogram,
185+
/// Number of clear operations
186+
clears: Counter,
187+
/// Time taken for clear operations
188+
clear_time_seconds: Histogram,
189+
}
190+
191+
/// Helper for timing database operations.
192+
pub(super) struct OpTimer {
193+
start: Instant,
194+
}
195+
196+
impl OpTimer {
197+
/// Starts timing an operation.
198+
pub fn new() -> Self {
199+
Self { start: Instant::now() }
200+
}
201+
202+
/// Returns the elapsed time in seconds.
203+
pub fn elapsed(&self) -> f64 {
204+
self.start.elapsed().as_secs_f64()
205+
}
206+
}

crates/storage/db/src/mdbx/mod.rs

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ use std::sync::Arc;
55
use katana_metrics::metrics::gauge;
66
pub use libmdbx;
77
use libmdbx::{DatabaseFlags, EnvironmentFlags, Geometry, PageSize, SyncMode, RO, RW};
8-
use metrics::{describe_gauge, Label};
98
use tracing::error;
109

1110
use crate::abstraction::Database;
@@ -14,9 +13,11 @@ use crate::tables::{TableType, Tables, NUM_TABLES};
1413
use crate::{utils, GIGABYTE, TERABYTE};
1514

1615
pub mod cursor;
16+
pub mod metrics;
1717
pub mod stats;
1818
pub mod tx;
1919

20+
use self::metrics::DbMetrics;
2021
use self::stats::{Stats, TableStat};
2122
use self::tx::Tx;
2223

@@ -98,8 +99,9 @@ impl DbEnvBuilder {
9899

99100
let env = builder.open(path.as_ref()).map_err(DatabaseError::OpenEnv)?;
100101
let dir = path.as_ref().to_path_buf();
102+
let metrics = DbMetrics::new();
101103

102-
Ok(DbEnv { inner: Arc::new(DbEnvInner { env, dir }) }.with_metrics())
104+
Ok(DbEnv { inner: Arc::new(DbEnvInner { env, dir, metrics }) })
103105
}
104106
}
105107

@@ -110,17 +112,30 @@ impl Default for DbEnvBuilder {
110112
}
111113

112114
/// Wrapper for `libmdbx-sys` environment.
113-
#[derive(Debug, Clone)]
115+
#[derive(Clone)]
114116
pub struct DbEnv {
115117
pub(crate) inner: Arc<DbEnvInner>,
116118
}
117119

118-
#[derive(Debug)]
119120
pub(super) struct DbEnvInner {
120121
/// The handle to the MDBX environment.
121122
pub(super) env: libmdbx::Environment,
122123
/// The path where the database environemnt is stored at.
123124
pub(super) dir: PathBuf,
125+
/// Metrics for database operations.
126+
pub(super) metrics: DbMetrics,
127+
}
128+
129+
impl std::fmt::Debug for DbEnv {
130+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131+
f.debug_struct("DbEnv").field("dir", &self.inner.dir).finish_non_exhaustive()
132+
}
133+
}
134+
135+
impl std::fmt::Debug for DbEnvInner {
136+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
137+
f.debug_struct("DbEnvInner").field("dir", &self.dir).finish_non_exhaustive()
138+
}
124139
}
125140

126141
impl DbEnv {
@@ -146,14 +161,6 @@ impl DbEnv {
146161
pub fn path(&self) -> &Path {
147162
&self.inner.dir
148163
}
149-
150-
pub(super) fn with_metrics(self) -> Self {
151-
describe_gauge!("db.table_size", metrics::Unit::Bytes, "Total size of the table");
152-
describe_gauge!("db.table_pages", metrics::Unit::Count, "Number of pages in the table");
153-
describe_gauge!("db.table_entries", metrics::Unit::Count, "Number of entries in the table");
154-
describe_gauge!("db.freelist", metrics::Unit::Bytes, "Size of the database freelist");
155-
self
156-
}
157164
}
158165

159166
impl Database for DbEnv {
@@ -163,12 +170,16 @@ impl Database for DbEnv {
163170

164171
#[tracing::instrument(level = "trace", name = "db_txn_ro_create", skip_all)]
165172
fn tx(&self) -> Result<Self::Tx, DatabaseError> {
166-
Ok(Tx::new(self.inner.env.begin_ro_txn().map_err(DatabaseError::CreateROTx)?))
173+
let tx = self.inner.env.begin_ro_txn().map_err(DatabaseError::CreateROTx)?;
174+
self.inner.metrics.record_ro_tx_create();
175+
Ok(Tx::new(tx, self.inner.metrics.clone()))
167176
}
168177

169178
#[tracing::instrument(level = "trace", name = "db_txn_rw_create", skip_all)]
170179
fn tx_mut(&self) -> Result<Self::TxMut, DatabaseError> {
171-
Ok(Tx::new(self.inner.env.begin_rw_txn().map_err(DatabaseError::CreateRWTx)?))
180+
let tx = self.inner.env.begin_rw_txn().map_err(DatabaseError::CreateRWTx)?;
181+
self.inner.metrics.record_rw_tx_create();
182+
Ok(Tx::new(tx, self.inner.metrics.clone()))
172183
}
173184

174185
fn stats(&self) -> Result<Self::Stats, DatabaseError> {
@@ -195,28 +206,37 @@ impl katana_metrics::Report for DbEnv {
195206
let mut pgsize = 0;
196207

197208
for (table, stat) in stats.table_stats() {
198-
gauge!("db.table_size", vec![Label::new("table", *table)])
209+
gauge!("db.table_size", vec![::metrics::Label::new("table", *table)])
199210
.set(stat.total_size() as f64);
200211

201212
gauge!(
202213
"db.table_pages",
203-
vec![Label::new("table", *table), Label::new("type", "leaf")]
214+
vec![
215+
::metrics::Label::new("table", *table),
216+
::metrics::Label::new("type", "leaf")
217+
]
204218
)
205219
.set(stat.leaf_pages() as f64);
206220

207221
gauge!(
208222
"db.table_pages",
209-
vec![Label::new("table", *table), Label::new("type", "branch")]
223+
vec![
224+
::metrics::Label::new("table", *table),
225+
::metrics::Label::new("type", "branch")
226+
]
210227
)
211228
.set(stat.branch_pages() as f64);
212229

213230
gauge!(
214231
"db.table_pages",
215-
vec![Label::new("table", *table), Label::new("type", "overflow")]
232+
vec![
233+
::metrics::Label::new("table", *table),
234+
::metrics::Label::new("type", "overflow")
235+
]
216236
)
217237
.set(stat.overflow_pages() as f64);
218238

219-
gauge!("db.table_entries", vec![Label::new("table", *table)])
239+
gauge!("db.table_entries", vec![::metrics::Label::new("table", *table)])
220240
.set(stat.entries() as f64);
221241

222242
if pgsize == 0 {

0 commit comments

Comments
 (0)