Skip to content

Commit fe28315

Browse files
committed
Simple LRU cache for reading blocks
Helps a lot with S3 performance
1 parent 844ea19 commit fe28315

File tree

7 files changed

+111
-6
lines changed

7 files changed

+111
-6
lines changed

Cargo.lock

Lines changed: 38 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ globset = "0.4.5"
3333
hex = "0.4.2"
3434
itertools = "0.10"
3535
lazy_static = "1.4.0"
36+
lru = "0.11"
3637
mutants = "0.0.3"
3738
rayon = "1.3.0"
3839
readahead-iterator = "0.1.1"

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
- S3 support! Enable it with `cargo install --features s3`, then e.g. `cargo backup s3://mybucket.example/`.
66

7+
- Performance: A simple cache of retrieved decompressed blocks now speeds up restores, especially on relatively slow storage like S3.
8+
79
- `--debug` now shows on stderr only debug messages from Conserve itself and not
810
from dependencies. All the messages are still recorded to the `--log-json` file
911
if that is given.

src/blockdir.rs

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,16 @@ use std::collections::{HashMap, HashSet};
2525
use std::convert::TryInto;
2626
use std::sync::atomic::Ordering::Relaxed;
2727
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
28-
use std::sync::Arc;
28+
use std::sync::{Arc, RwLock};
2929
use std::time::Instant;
3030

3131
use bytes::Bytes;
32+
use lru::LruCache;
3233
use rayon::prelude::*;
3334
use serde::{Deserialize, Serialize};
3435
#[allow(unused_imports)]
3536
use tracing::{debug, error, info, warn};
37+
use tracing::{instrument, trace};
3638

3739
use crate::backup::BackupStats;
3840
use crate::blockhash::BlockHash;
@@ -46,6 +48,9 @@ const BLOCKDIR_FILE_NAME_LEN: usize = crate::BLAKE_HASH_SIZE_BYTES * 2;
4648
/// Take this many characters from the block hash to form the subdirectory name.
4749
const SUBDIR_NAME_CHARS: usize = 3;
4850

51+
/// Cache this many blocks in memory, of up to 1MB each.
52+
const CACHE_SIZE: usize = 1000;
53+
4954
/// Points to some compressed data inside the block dir.
5055
///
5156
/// Identifiers are: which file contains it, at what (pre-compression) offset,
@@ -69,6 +74,8 @@ pub struct Address {
6974
pub struct BlockDir {
7075
transport: Arc<dyn Transport>,
7176
pub stats: BlockDirStats,
77+
// TODO: There are fancier caches and they might help, but this one works, and Stretto did not work for me.
78+
cache: RwLock<LruCache<BlockHash, Bytes>>,
7279
}
7380

7481
/// Returns the transport-relative subdirectory name.
@@ -87,6 +94,7 @@ impl BlockDir {
8794
BlockDir {
8895
transport,
8996
stats: BlockDirStats::default(),
97+
cache: RwLock::new(LruCache::new(CACHE_SIZE.try_into().unwrap())),
9098
}
9199
}
92100

@@ -111,6 +119,10 @@ impl BlockDir {
111119
return Ok(hash);
112120
}
113121
let compressed = Compressor::new().compress(&block_data)?;
122+
self.cache
123+
.write()
124+
.expect("Lock cache")
125+
.put(hash.clone(), block_data);
114126
let comp_len: u64 = compressed.len().try_into().unwrap();
115127
let hex_hash = hash.to_string();
116128
let relpath = block_relpath(&hash);
@@ -131,6 +143,10 @@ impl BlockDir {
131143
/// So, these are specifically treated as missing, so there's a chance to heal
132144
/// them later.
133145
pub fn contains(&self, hash: &BlockHash) -> Result<bool> {
146+
if self.cache.read().expect("Lock cache").contains(hash) {
147+
self.stats.cache_hit.fetch_add(1, Relaxed);
148+
return Ok(true);
149+
}
134150
match self.transport.metadata(&block_relpath(hash)) {
135151
Err(err) if err.is_not_found() => Ok(false),
136152
Err(err) => {
@@ -165,10 +181,13 @@ impl BlockDir {
165181
/// Return the entire contents of the block.
166182
///
167183
/// Checks that the hash is correct with the contents.
184+
#[instrument(skip(self))]
168185
pub fn get_block_content(&self, hash: &BlockHash) -> Result<Bytes> {
169-
// TODO: Reuse decompressor buffer.
170-
// TODO: Most importantly, cache decompressed blocks!
171-
// TODO: Stats for block reads, maybe in the blockdir?
186+
if let Some(hit) = self.cache.write().expect("Lock cache").get(hash) {
187+
self.stats.cache_hit.fetch_add(1, Relaxed);
188+
trace!("Block cache hit");
189+
return Ok(hit.clone());
190+
}
172191
let mut decompressor = Decompressor::new();
173192
let block_relpath = block_relpath(hash);
174193
let compressed_bytes = self.transport.read_file(&block_relpath)?;
@@ -178,6 +197,10 @@ impl BlockDir {
178197
error!(%hash, %actual_hash, %block_relpath, "Block file has wrong hash");
179198
return Err(Error::BlockCorrupt { hash: hash.clone() });
180199
}
200+
self.cache
201+
.write()
202+
.expect("Lock cache")
203+
.put(hash.clone(), decompressed_bytes.clone());
181204
self.stats.read_blocks.fetch_add(1, Relaxed);
182205
self.stats
183206
.read_block_compressed_bytes
@@ -189,6 +212,7 @@ impl BlockDir {
189212
}
190213

191214
pub fn delete_block(&self, hash: &BlockHash) -> Result<()> {
215+
self.cache.write().expect("Lock cache").pop(hash);
192216
self.transport
193217
.remove_file(&block_relpath(hash))
194218
.map_err(Error::from)
@@ -290,6 +314,7 @@ pub struct BlockDirStats {
290314
pub read_blocks: AtomicUsize,
291315
pub read_block_compressed_bytes: AtomicUsize,
292316
pub read_block_uncompressed_bytes: AtomicUsize,
317+
pub cache_hit: AtomicUsize,
293318
}
294319

295320
#[cfg(test)]
@@ -309,6 +334,9 @@ mod test {
309334
.store_or_deduplicate(Bytes::from("stuff"), &mut stats)
310335
.unwrap();
311336
assert!(blockdir.contains(&hash).unwrap());
337+
338+
// Open again to get a fresh cache
339+
let blockdir = BlockDir::open(open_local_transport(tempdir.path()).unwrap());
312340
OpenOptions::new()
313341
.write(true)
314342
.truncate(true)
@@ -317,4 +345,27 @@ mod test {
317345
.expect("Truncate block");
318346
assert!(!blockdir.contains(&hash).unwrap());
319347
}
348+
349+
#[test]
350+
fn cache_hit() {
351+
let tempdir = TempDir::new().unwrap();
352+
let blockdir = BlockDir::open(open_local_transport(tempdir.path()).unwrap());
353+
let mut stats = BackupStats::default();
354+
let content = Bytes::from("stuff");
355+
let hash = blockdir
356+
.store_or_deduplicate(content.clone(), &mut stats)
357+
.unwrap();
358+
assert_eq!(blockdir.stats.cache_hit.load(Relaxed), 0);
359+
360+
assert!(blockdir.contains(&hash).unwrap());
361+
assert_eq!(blockdir.stats.cache_hit.load(Relaxed), 1);
362+
363+
let retrieved = blockdir.get_block_content(&hash).unwrap();
364+
assert_eq!(content, retrieved);
365+
assert_eq!(blockdir.stats.cache_hit.load(Relaxed), 2); // hit against the value written
366+
367+
let retrieved = blockdir.get_block_content(&hash).unwrap();
368+
assert_eq!(content, retrieved);
369+
assert_eq!(blockdir.stats.cache_hit.load(Relaxed), 3); // hit again
370+
}
320371
}

src/restore.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,14 @@ use std::fs::File;
1616
use std::io;
1717
use std::io::Write;
1818
use std::path::{Path, PathBuf};
19+
use std::sync::atomic::Ordering::Relaxed;
1920
use std::{fs, time::Instant};
2021

2122
use filetime::set_file_handle_times;
2223
#[cfg(unix)]
2324
use filetime::set_symlink_file_times;
2425
use time::OffsetDateTime;
25-
use tracing::{error, instrument, warn};
26+
use tracing::{error, instrument, trace, warn};
2627

2728
use crate::band::BandSelectionPolicy;
2829
use crate::io::{directory_is_empty, ensure_dir_exists};
@@ -148,6 +149,7 @@ pub fn restore(
148149
}
149150
stats += apply_deferrals(&deferrals)?;
150151
stats.elapsed = start.elapsed();
152+
stats.block_cache_hits = block_dir.stats.cache_hit.load(Relaxed);
151153
// TODO: Merge in stats from the tree iter and maybe the source tree?
152154
Ok(stats)
153155
}
@@ -244,6 +246,7 @@ fn restore_file(
244246
stats.errors += 1;
245247
}
246248
// TODO: Accumulate more stats.
249+
trace!("Restored file");
247250
Ok(stats)
248251
}
249252

src/stats.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ pub struct RestoreStats {
110110
pub uncompressed_file_bytes: u64,
111111

112112
pub elapsed: Duration,
113+
114+
pub block_cache_hits: usize,
113115
}
114116

115117
impl fmt::Display for RestoreStats {
@@ -122,6 +124,9 @@ impl fmt::Display for RestoreStats {
122124
write_count(w, "unsupported file kind", self.unknown_kind);
123125
writeln!(w).unwrap();
124126

127+
write_count(w, "block cache hits", self.block_cache_hits);
128+
writeln!(w).unwrap();
129+
125130
write_count(w, "errors", self.errors);
126131
write_duration(w, "elapsed", self.elapsed)?;
127132

tests/damage/main.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,14 @@ fn backup_after_damage(
8181
let backup_options = BackupOptions::default();
8282
backup(&archive, source_dir.path(), &backup_options).expect("initial backup");
8383

84+
drop(archive);
8485
action.damage(&location.to_path(&archive_dir));
8586

87+
// Open the archive again to avoid cache effects.
88+
let archive =
89+
Archive::open(conserve::transport::open_local_transport(archive_dir.path()).unwrap())
90+
.expect("open archive");
91+
8692
// A second backup should succeed.
8793
changes.apply(&source_dir);
8894
let backup_stats = backup(&archive, source_dir.path(), &backup_options)

0 commit comments

Comments
 (0)