Skip to content

Commit 701a8cf

Browse files
authored
Merge pull request #1952 from tursodatabase/sqld-metrics
add few simple metrics to sqld
2 parents 224b57e + c921508 commit 701a8cf

File tree

14 files changed

+93
-23
lines changed

14 files changed

+93
-23
lines changed

libsql-server/src/config.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ pub struct DbConfig {
101101
pub snapshot_at_shutdown: bool,
102102
pub encryption_config: Option<EncryptionConfig>,
103103
pub max_concurrent_requests: u64,
104+
pub disable_intelligent_throttling: bool,
105+
pub connection_creation_timeout: Option<Duration>,
104106
}
105107

106108
impl Default for DbConfig {
@@ -119,6 +121,8 @@ impl Default for DbConfig {
119121
snapshot_at_shutdown: false,
120122
encryption_config: None,
121123
max_concurrent_requests: 128,
124+
disable_intelligent_throttling: false,
125+
connection_creation_timeout: None,
122126
}
123127
}
124128
}

libsql-server/src/connection/mod.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@ use crate::error::Error;
1313
use crate::http::user::timing::sample_time;
1414
use crate::metrics::{
1515
CONCURRENT_CONNECTIONS_COUNT, CONNECTION_ALIVE_DURATION, CONNECTION_CREATE_TIME,
16+
TOTAL_RESPONSE_SIZE_HIST,
1617
};
1718
use crate::namespace::meta_store::MetaStore;
1819
use crate::namespace::NamespaceName;
1920
use crate::query::{Params, Query};
2021
use crate::query_analysis::Statement;
21-
use crate::query_result_builder::{IgnoreResult, QueryResultBuilder};
22+
use crate::query_result_builder::{IgnoreResult, QueryResultBuilder, TOTAL_RESPONSE_SIZE};
2223
use crate::replication::FrameNo;
2324
use crate::Result;
2425

@@ -205,6 +206,7 @@ pub trait MakeConnection: Send + Sync + 'static {
205206
timeout: Option<Duration>,
206207
max_total_response_size: u64,
207208
max_concurrent_requests: u64,
209+
disable_intelligent_throttling: bool,
208210
) -> MakeThrottledConnection<Self>
209211
where
210212
Self: Sized,
@@ -215,6 +217,7 @@ pub trait MakeConnection: Send + Sync + 'static {
215217
timeout,
216218
max_total_response_size,
217219
max_concurrent_requests,
220+
disable_intelligent_throttling,
218221
)
219222
}
220223

@@ -280,6 +283,7 @@ pub struct MakeThrottledConnection<F> {
280283
max_total_response_size: u64,
281284
waiters: AtomicUsize,
282285
max_concurrent_requests: u64,
286+
disable_intelligent_throttling: bool,
283287
}
284288

285289
impl<F> MakeThrottledConnection<F> {
@@ -289,6 +293,7 @@ impl<F> MakeThrottledConnection<F> {
289293
timeout: Option<Duration>,
290294
max_total_response_size: u64,
291295
max_concurrent_requests: u64,
296+
disable_intelligent_throttling: bool,
292297
) -> Self {
293298
Self {
294299
semaphore,
@@ -297,12 +302,16 @@ impl<F> MakeThrottledConnection<F> {
297302
max_total_response_size,
298303
waiters: AtomicUsize::new(0),
299304
max_concurrent_requests,
305+
disable_intelligent_throttling,
300306
}
301307
}
302308

303309
// How many units should be acquired from the semaphore,
304310
// depending on current memory pressure.
305311
fn units_to_take(&self) -> u32 {
312+
if self.disable_intelligent_throttling {
313+
return 1;
314+
}
306315
let total_response_size = crate::query_result_builder::TOTAL_RESPONSE_SIZE
307316
.load(std::sync::atomic::Ordering::Relaxed) as u64;
308317
if total_response_size * 2 > self.max_total_response_size {
@@ -352,6 +361,8 @@ impl<F: MakeConnection> MakeConnection for MakeThrottledConnection<F> {
352361
"Available semaphore units: {}",
353362
self.semaphore.available_permits()
354363
);
364+
TOTAL_RESPONSE_SIZE_HIST
365+
.record(TOTAL_RESPONSE_SIZE.load(std::sync::atomic::Ordering::Relaxed) as f64);
355366
let units = self.units_to_take();
356367
let waiters_guard = WaitersGuard::new(&self.waiters);
357368
if (waiters_guard.waiters.load(Ordering::Relaxed) as u64) >= self.max_concurrent_requests {
@@ -519,6 +530,7 @@ pub mod test {
519530
Some(Duration::from_millis(100)),
520531
u64::MAX,
521532
u64::MAX,
533+
false,
522534
);
523535

524536
let mut conns = Vec::with_capacity(10);

libsql-server/src/hrana/http/stream.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use tokio::time::{Duration, Instant};
1212

1313
use crate::connection::MakeConnection;
1414
use crate::database::Connection;
15+
use crate::metrics::STREAM_HANDLES_COUNT;
1516

1617
use super::super::ProtocolError;
1718
use super::Server;
@@ -169,6 +170,8 @@ pub async fn acquire<'srv>(
169170
baton_seq: rand::random(),
170171
});
171172
state.handles.insert(stream.stream_id, Handle::Acquired);
173+
STREAM_HANDLES_COUNT.increment(1.0);
174+
172175
tracing::debug!(
173176
"Stream {} was created with baton seq {}",
174177
stream.stream_id,
@@ -253,6 +256,7 @@ impl<'srv> Drop for Guard<'srv> {
253256
tracing::debug!("Stream {stream_id} was released for further use");
254257
} else {
255258
tracing::debug!("Stream {stream_id} was closed");
259+
STREAM_HANDLES_COUNT.decrement(1.0);
256260
}
257261
}
258262
}
@@ -374,6 +378,7 @@ fn pump_expire(state: &mut ServerStreamState, cx: &mut task::Context) {
374378

375379
match state.handles.get_mut(&stream_id) {
376380
Some(handle @ Handle::Available(_)) => {
381+
STREAM_HANDLES_COUNT.decrement(1.0);
377382
*handle = Handle::Expired;
378383
}
379384
_ => continue,

libsql-server/src/hrana/stmt.rs

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use anyhow::{anyhow, bail, Result};
2+
use metrics::counter;
23
use std::collections::HashMap;
34

45
use super::result_builder::SingleStatementBuilder;
@@ -206,39 +207,46 @@ fn catch_stmt_error(sqld_error: SqldError) -> anyhow::Error {
206207
}
207208

208209
pub fn stmt_error_from_sqld_error(sqld_error: SqldError) -> Result<StmtError, SqldError> {
209-
Ok(match sqld_error {
210-
SqldError::LibSqlInvalidQueryParams(source) => StmtError::ArgsInvalid { source },
211-
SqldError::LibSqlTxTimeout => StmtError::TransactionTimeout,
212-
SqldError::LibSqlTxBusy => StmtError::TransactionBusy,
210+
let result = match sqld_error {
211+
SqldError::LibSqlInvalidQueryParams(source) => Ok(StmtError::ArgsInvalid { source }),
212+
SqldError::LibSqlTxTimeout => Ok(StmtError::TransactionTimeout),
213+
SqldError::LibSqlTxBusy => Ok(StmtError::TransactionBusy),
213214
SqldError::BuilderError(QueryResultBuilderError::ResponseTooLarge(_)) => {
214-
StmtError::ResponseTooLarge
215+
Ok(StmtError::ResponseTooLarge)
215216
}
216-
SqldError::Blocked(reason) => StmtError::Blocked { reason },
217-
SqldError::RpcQueryError(e) => StmtError::Proxy(e.message),
217+
SqldError::Blocked(reason) => Ok(StmtError::Blocked { reason }),
218+
SqldError::RpcQueryError(e) => Ok(StmtError::Proxy(e.message)),
218219
SqldError::RusqliteError(rusqlite_error)
219220
| SqldError::RusqliteErrorExtended(rusqlite_error, _) => match rusqlite_error {
220-
rusqlite::Error::SqliteFailure(sqlite_error, Some(message)) => StmtError::SqliteError {
221-
source: sqlite_error,
222-
message,
223-
},
224-
rusqlite::Error::SqliteFailure(sqlite_error, None) => StmtError::SqliteError {
221+
rusqlite::Error::SqliteFailure(sqlite_error, Some(message)) => {
222+
Ok(StmtError::SqliteError {
223+
source: sqlite_error,
224+
message,
225+
})
226+
}
227+
rusqlite::Error::SqliteFailure(sqlite_error, None) => Ok(StmtError::SqliteError {
225228
message: sqlite_error.to_string(),
226229
source: sqlite_error,
227-
},
230+
}),
228231
rusqlite::Error::SqlInputError {
229232
error: sqlite_error,
230233
msg: message,
231234
offset,
232235
..
233-
} => StmtError::SqlInputError {
236+
} => Ok(StmtError::SqlInputError {
234237
source: sqlite_error,
235238
message,
236239
offset,
237-
},
238-
rusqlite_error => return Err(SqldError::RusqliteError(rusqlite_error)),
240+
}),
241+
rusqlite_error => Err(SqldError::RusqliteError(rusqlite_error)),
239242
},
240-
sqld_error => return Err(sqld_error),
241-
})
243+
sqld_error => Err(sqld_error),
244+
};
245+
246+
let code = result.as_ref().map(|x| x.code()).unwrap_or("UKNOWN");
247+
counter!("libsql_server_hrana_step_errors", 1, "code" => code);
248+
249+
result
242250
}
243251

244252
pub fn proto_error_from_stmt_error(error: &StmtError) -> hrana::proto::Error {

libsql-server/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,8 @@ where
634634
max_concurrent_connections: Arc::new(Semaphore::new(self.max_concurrent_connections)),
635635
max_concurrent_requests: self.db_config.max_concurrent_requests,
636636
encryption_config: self.db_config.encryption_config.clone(),
637+
disable_intelligent_throttling: self.db_config.disable_intelligent_throttling,
638+
connection_creation_timeout: self.db_config.connection_creation_timeout,
637639
};
638640

639641
let (metastore_conn_maker, meta_store_wal_manager) =

libsql-server/src/main.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,13 @@ struct Cli {
248248
#[clap(long, default_value = "128", env = "SQLD_MAX_CONCURRENT_REQUESTS")]
249249
max_concurrent_requests: u64,
250250

251+
// disable throttling logic which adjust concurrency limits based on memory-pressure conditions
252+
#[clap(long, env = "SQLD_DISABLE_INTELLIGENT_THROTTLING")]
253+
disable_intelligent_throttling: bool,
254+
255+
#[clap(long, env = "SQLD_CONNECTION_CREATION_TIMEOUT_SEC")]
256+
connection_creation_timeout_sec: Option<u64>,
257+
251258
/// Allow meta store to recover config from filesystem from older version, if meta store is
252259
/// empty on startup
253260
#[clap(long, env = "SQLD_ALLOW_METASTORE_RECOVERY")]
@@ -421,6 +428,10 @@ fn make_db_config(config: &Cli) -> anyhow::Result<DbConfig> {
421428
snapshot_at_shutdown: config.snapshot_at_shutdown,
422429
encryption_config: encryption_config.clone(),
423430
max_concurrent_requests: config.max_concurrent_requests,
431+
disable_intelligent_throttling: config.disable_intelligent_throttling,
432+
connection_creation_timeout: config
433+
.connection_creation_timeout_sec
434+
.map(|x| Duration::from_secs(x)),
424435
})
425436
}
426437

libsql-server/src/metrics.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,16 @@ pub static CONCURRENT_CONNECTIONS_COUNT: Lazy<Gauge> = Lazy::new(|| {
2727
describe_gauge!(NAME, "number of concurrent connections");
2828
register_gauge!(NAME)
2929
});
30+
pub static TOTAL_RESPONSE_SIZE_HIST: Lazy<Histogram> = Lazy::new(|| {
31+
const NAME: &str = "libsql_server_total_response_size_before_lock";
32+
describe_histogram!(NAME, "total response size value before connection lock");
33+
register_histogram!(NAME)
34+
});
35+
pub static STREAM_HANDLES_COUNT: Lazy<Gauge> = Lazy::new(|| {
36+
const NAME: &str = "libsql_server_stream_handles";
37+
describe_gauge!(NAME, "amount of in-memory stream handles");
38+
register_gauge!(NAME)
39+
});
3040
pub static NAMESPACE_LOAD_LATENCY: Lazy<Histogram> = Lazy::new(|| {
3141
const NAME: &str = "libsql_server_namespace_load_latency";
3242
describe_histogram!(NAME, "latency is us when loading a namespace");

libsql-server/src/namespace/configurator/helpers.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,12 @@ pub(super) async fn make_primary_connection_maker(
180180
.await?
181181
.throttled(
182182
base_config.max_concurrent_connections.clone(),
183-
Some(DB_CREATE_TIMEOUT),
183+
base_config
184+
.connection_creation_timeout
185+
.or(Some(DB_CREATE_TIMEOUT)),
184186
base_config.max_total_response_size,
185187
base_config.max_concurrent_requests,
188+
base_config.disable_intelligent_throttling,
186189
),
187190
);
188191

libsql-server/src/namespace/configurator/libsql_primary.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,12 @@ pub(super) async fn libsql_primary_common(
118118
}
119119
.throttled(
120120
base_config.max_concurrent_connections.clone(),
121-
Some(DB_CREATE_TIMEOUT),
121+
base_config
122+
.connection_creation_timeout
123+
.or(Some(DB_CREATE_TIMEOUT)),
122124
base_config.max_total_response_size,
123125
base_config.max_concurrent_requests,
126+
base_config.disable_intelligent_throttling,
124127
);
125128
let connection_maker = Arc::new(connection_maker);
126129

libsql-server/src/namespace/configurator/libsql_replica.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,9 +170,12 @@ impl ConfigureNamespace for LibsqlReplicaConfigurator {
170170
)
171171
.throttled(
172172
self.base.max_concurrent_connections.clone(),
173-
Some(DB_CREATE_TIMEOUT),
173+
self.base
174+
.connection_creation_timeout
175+
.or(Some(DB_CREATE_TIMEOUT)),
174176
self.base.max_total_response_size,
175177
self.base.max_concurrent_requests,
178+
self.base.disable_intelligent_throttling,
176179
),
177180
);
178181

0 commit comments

Comments
 (0)