|
| 1 | +// Copyright (C) 2013-2020 Blockstack PBC, a public benefit corporation |
| 2 | +// Copyright (C) 2020 Stacks Open Internet Foundation |
| 3 | +// |
| 4 | +// This program is free software: you can redistribute it and/or modify |
| 5 | +// it under the terms of the GNU General Public License as published by |
| 6 | +// the Free Software Foundation, either version 3 of the License, or |
| 7 | +// (at your option) any later version. |
| 8 | +// |
| 9 | +// This program is distributed in the hope that it will be useful, |
| 10 | +// but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | +// GNU General Public License for more details. |
| 13 | +// |
| 14 | +// You should have received a copy of the GNU General Public License |
| 15 | +// along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 16 | + |
| 17 | +use std::backtrace::Backtrace; |
| 18 | +use std::sync::{LazyLock, Mutex}; |
| 19 | +use std::thread; |
| 20 | +use std::time::{Duration, Instant}; |
| 21 | + |
| 22 | +use hashbrown::HashMap; |
| 23 | +use rand::{thread_rng, Rng}; |
| 24 | +use rusqlite::Connection; |
| 25 | + |
| 26 | +use crate::util::sleep_ms; |
| 27 | + |
| 28 | +/// Keep track of DB locks, for deadlock debugging |
| 29 | +/// - **key:** `rusqlite::Connection` debug print |
| 30 | +/// - **value:** Lock holder (thread name + timestamp) |
| 31 | +/// |
| 32 | +/// This uses a `Mutex` inside of `LazyLock` because: |
| 33 | +/// - Using `Mutex` alone, it can't be statically initialized because `HashMap::new()` isn't `const` |
| 34 | +/// - Using `LazyLock` alone doesn't allow interior mutability |
| 35 | +static LOCK_TABLE: LazyLock<Mutex<HashMap<String, String>>> = |
| 36 | + LazyLock::new(|| Mutex::new(HashMap::new())); |
| 37 | +/// Generate timestanps for use in `LOCK_TABLE` |
| 38 | +/// `Instant` is preferable to `SystemTime` because it uses `CLOCK_MONOTONIC` and is not affected by NTP adjustments |
| 39 | +static LOCK_TABLE_TIMER: LazyLock<Instant> = LazyLock::new(Instant::now); |
| 40 | + |
| 41 | +/// Call when using an operation which locks a database |
| 42 | +/// Updates `LOCK_TABLE` |
| 43 | +pub fn update_lock_table(conn: &Connection) { |
| 44 | + let timestamp = LOCK_TABLE_TIMER.elapsed().as_millis(); |
| 45 | + // The debug format for `Connection` includes the path |
| 46 | + let k = format!("{conn:?}"); |
| 47 | + let v = format!("{:?}@{timestamp}", thread::current().name()); |
| 48 | + LOCK_TABLE.lock().unwrap().insert(k, v); |
| 49 | +} |
| 50 | + |
| 51 | +/// Called by `rusqlite` if we are waiting too long on a database lock |
| 52 | +/// If called too many times, will assume a deadlock and panic |
| 53 | +pub fn tx_busy_handler(run_count: i32) -> bool { |
| 54 | + const TIMEOUT: Duration = Duration::from_secs(300); |
| 55 | + const AVG_SLEEP_TIME_MS: u64 = 100; |
| 56 | + |
| 57 | + // First, check if this is taking unreasonably long. If so, it's probably a deadlock |
| 58 | + let run_count = run_count.unsigned_abs(); |
| 59 | + let approx_time_elapsed = |
| 60 | + Duration::from_millis(AVG_SLEEP_TIME_MS.saturating_mul(u64::from(run_count))); |
| 61 | + if approx_time_elapsed > TIMEOUT { |
| 62 | + error!("Deadlock detected. Waited {} seconds (estimated) for database lock. Giving up", approx_time_elapsed.as_secs(); |
| 63 | + "run_count" => run_count, |
| 64 | + "backtrace" => ?Backtrace::capture() |
| 65 | + ); |
| 66 | + for (k, v) in LOCK_TABLE.lock().unwrap().iter() { |
| 67 | + error!("Database '{k}' last locked by {v}"); |
| 68 | + } |
| 69 | + panic!("Deadlock in thread {:?}", thread::current().name()); |
| 70 | + } |
| 71 | + |
| 72 | + let mut sleep_time_ms = 2u64.saturating_pow(run_count); |
| 73 | + |
| 74 | + sleep_time_ms = sleep_time_ms.saturating_add(thread_rng().gen_range(0..sleep_time_ms)); |
| 75 | + |
| 76 | + if sleep_time_ms > AVG_SLEEP_TIME_MS { |
| 77 | + let jitter = 10; |
| 78 | + sleep_time_ms = |
| 79 | + thread_rng().gen_range((AVG_SLEEP_TIME_MS - jitter)..(AVG_SLEEP_TIME_MS + jitter)); |
| 80 | + } |
| 81 | + |
| 82 | + let msg = format!("Database is locked; sleeping {sleep_time_ms}ms and trying again"); |
| 83 | + if run_count > 10 && run_count % 10 == 0 { |
| 84 | + warn!("{msg}"; |
| 85 | + "run_count" => run_count, |
| 86 | + "backtrace" => ?Backtrace::capture() |
| 87 | + ); |
| 88 | + } else { |
| 89 | + debug!("{msg}"); |
| 90 | + } |
| 91 | + |
| 92 | + sleep_ms(sleep_time_ms); |
| 93 | + true |
| 94 | +} |
0 commit comments