Skip to content

Commit 944b905

Browse files
committed
avoid checkpoint starvation if DB handle continious load of overlapping reads and writes
1 parent f402441 commit 944b905

File tree

1 file changed

+49
-5
lines changed

1 file changed

+49
-5
lines changed

libsql-server/src/connection/connection_manager.rs

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,43 @@ impl ManagedConnectionWalWrapper {
167167
extended_code: 517, // stale read
168168
});
169169
}
170+
// If other connection is about to checkpoint - we better to immediately return.
171+
//
172+
// The reason is that write transaction are upgraded from read transactions in SQLite.
173+
// Due to this, every write transaction need to hold SHARED-WAL lock and if we will
174+
// block write transaction here - we will prevent checkpoint process from restarting the WAL
175+
// (because it needs to acquire EXCLUSIVE-WAL lock)
176+
//
177+
// So, the scenario is following:
178+
// T0: we have a bunch of SELECT queries which will execute till time T2
179+
// T1: CHECKPOINT process is starting: it holds CKPT and WRITE lock and attempt to acquire
180+
// EXCLUSIVE-WAL locks one by one in order to check the position of readers. CHECKPOINT will
181+
// use busy handler and can potentially acquire lock not from the first attempt.
182+
// T2: CHECKPOINT process were able to check all WAL reader positions (by acquiring lock or atomically check reader position)
183+
// and started to transfer WAL to the DB file
184+
// T3: INSERT query starts executing: it started as a read transaction and holded SHARED-WAL lock but then it needs to
185+
// upgrade to write transaction through begin_write_txn call
186+
// T4: CHECKPOINT transferred all pages from WAL to DB file and need to check if it can restart the WAL. In order to
187+
// do that it needs to hold all EXCLUSIVE-WAL locks to make sure that all readers use only DB file
188+
//
189+
// In the scenario above, if we will park INSERT at the time T3 - CHECKPOINT will be unable to hold EXCLUSIVE-WAL
190+
// locks and so WAL will not be truncated.
191+
// In case when DB has continious load with overlapping reads and writes - this problem became very noticeable
192+
// as it can defer WAL truncation a lot.
193+
//
194+
// Also, such implementation is more aligned with LibSQL/SQLite behaviour where sqlite3WalBeginWriteTransaction
195+
// immediately abort with SQLITE_BUSY error if it can't acquire WRITE lock (which CHECKPOINT also take before start of the work)
196+
// and busy handler (e.g. retries) for writes are invoked by SQLite at upper layer of request processing.
197+
match *current {
198+
Some(Slot {
199+
id,
200+
state: SlotState::Acquired(SlotType::Checkpoint),
201+
..
202+
}) if id != self.id => {
203+
return Err(rusqlite::ffi::Error::new(rusqlite::ffi::SQLITE_BUSY));
204+
}
205+
_ => {}
206+
}
170207
if current.as_mut().map_or(true, |slot| slot.id != self.id) && !enqueued {
171208
self.manager
172209
.write_queue
@@ -196,7 +233,7 @@ impl ManagedConnectionWalWrapper {
196233
let since_started = slot.started_at.elapsed();
197234
let deadline = slot.started_at + self.manager.txn_timeout_duration;
198235
match slot.state {
199-
SlotState::Acquired => {
236+
SlotState::Acquired(..) => {
200237
if since_started >= self.manager.txn_timeout_duration {
201238
let id = slot.id;
202239
drop(current);
@@ -354,11 +391,17 @@ impl ManagedConnectionWalWrapper {
354391
}
355392
}
356393

394+
#[derive(Copy, Clone, Debug, PartialEq)]
395+
enum SlotType {
396+
WriteTxn,
397+
Checkpoint,
398+
}
399+
357400
#[derive(Copy, Clone, Debug)]
358401
enum SlotState {
359402
Notified,
360403
Acquiring,
361-
Acquired,
404+
Acquired(SlotType),
362405
Failure,
363406
}
364407

@@ -389,7 +432,7 @@ impl WrapWal<InnerWal> for ManagedConnectionWalWrapper {
389432
Ok(_) => {
390433
tracing::debug!("transaction acquired");
391434
let mut lock = self.manager.current.lock();
392-
lock.as_mut().unwrap().state = SlotState::Acquired;
435+
lock.as_mut().unwrap().state = SlotState::Acquired(SlotType::WriteTxn);
393436

394437
Ok(())
395438
}
@@ -424,7 +467,8 @@ impl WrapWal<InnerWal> for ManagedConnectionWalWrapper {
424467
) -> libsql_sys::wal::Result<()> {
425468
let before = Instant::now();
426469
self.acquire()?;
427-
self.manager.current.lock().as_mut().unwrap().state = SlotState::Acquired;
470+
self.manager.current.lock().as_mut().unwrap().state =
471+
SlotState::Acquired(SlotType::Checkpoint);
428472

429473
let mode = if rand::random::<f32>() < 0.1 {
430474
CheckpointMode::Truncate
@@ -476,7 +520,7 @@ impl WrapWal<InnerWal> for ManagedConnectionWalWrapper {
476520
// if the slot acquire the transaction lock
477521
if let Some(Slot {
478522
id,
479-
state: SlotState::Acquired,
523+
state: SlotState::Acquired(..),
480524
..
481525
}) = *current
482526
{

0 commit comments

Comments
 (0)