Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
116 commits
Select commit Hold shift + click to select a range
947828b
feat: add verify_integrity() for full-file checksum verification
polaz Mar 14, 2026
3fa5f56
refactor(verify): stream file checksums and harden public API
polaz Mar 14, 2026
ddf8e9d
fix(verify): use u64 instead of private BlobFileId in public API
polaz Mar 14, 2026
221ea21
feat(verify): implement std::error::Error for IntegrityError
polaz Mar 14, 2026
19f4b66
refactor(verify): add #[must_use] to verify_integrity return value
polaz Mar 14, 2026
48824b9
docs(verify): document public visibility of enum variant fields
polaz Mar 14, 2026
dc5406b
docs(verify): annotate non-obvious hasher and import choices
polaz Mar 14, 2026
cd948eb
refactor(verify): use Xxh3Default for consistency, simplify assertion
polaz Mar 14, 2026
52f4d68
refactor(verify): use Xxh3Default::new() matching ChecksummedWriter
polaz Mar 14, 2026
dbb4ee4
feat: add optimized contains_prefix() method
polaz Mar 14, 2026
453e729
refactor(contains_prefix): accurate doc wording and test corrections
polaz Mar 14, 2026
c25e693
refactor(blob_tree): accurate contains_prefix override note
polaz Mar 14, 2026
273a801
feat: add multi_get() for batch point reads
polaz Mar 14, 2026
1962eb5
perf: seqno-aware seek in data block point reads
polaz Mar 14, 2026
c52ec80
docs(test): clarify seqno snapshot visibility in test comment
polaz Mar 14, 2026
236e4a2
fix: add default impl for multi_get and BlobTree test
polaz Mar 14, 2026
0513f33
docs(data_block): precise seek_to_key_seqno guarantees
polaz Mar 14, 2026
972ea55
feat: add SequenceNumberGenerator trait
polaz Mar 14, 2026
42d2c64
perf(data_block): single cmp in seek_to_key_seqno predicate
polaz Mar 14, 2026
f9bc2f0
fix: clarify multi_get docs and use existing helpers
polaz Mar 14, 2026
cbf88d3
docs(test): describe restart_interval loop coverage
polaz Mar 14, 2026
51ea6cf
refactor: extract BlobTree::resolve_key helper, add unsorted key test
polaz Mar 14, 2026
fdb55f7
fix: remove unused imports and invalid #[must_use] on trait impl
polaz Mar 14, 2026
cf3b0e2
fix: remove unused SequenceNumberGenerator imports from test files
polaz Mar 14, 2026
837a7f3
feat: add inherent method wrappers on SequenceNumberCounter
polaz Mar 14, 2026
0957a0b
docs(seqno): document UnwindSafe supertrait requirement
polaz Mar 14, 2026
a6eebcb
docs(seqno): document trait invariants and add #[must_use] to trait m…
polaz Mar 14, 2026
69e9691
test: add smoke test for custom SequenceNumberGenerator
polaz Mar 14, 2026
36f23e0
fix(seqno): enforce MSB boundary in SequenceNumberCounter
polaz Mar 14, 2026
334edc2
fix(test): enforce MSB invariant in custom generator example
polaz Mar 14, 2026
bc16549
docs(seqno): explain fetch_add vs CAS loop design choice in next()
polaz Mar 14, 2026
b0248bc
refactor: polish docs and consolidate Config impl block
polaz Mar 14, 2026
65ba5f3
docs(seqno): exempt set() from monotonicity invariant
polaz Mar 14, 2026
0f57228
docs(seqno): clarify orphan rule compliance for From impl
polaz Mar 14, 2026
2fc8fe8
docs(seqno): add Panics section to set() wrapper
polaz Mar 14, 2026
3dd7b1c
fix(seqno): use fetch_update instead of fetch_add in next()
polaz Mar 14, 2026
e14a15d
fix(test): use fetch_update in OffsetGenerator::next()
polaz Mar 14, 2026
8b7a67d
docs(config): reflow new_with_generators doc comment
polaz Mar 14, 2026
787f183
fix(version): clamp visible_seqno to stay below reserved MSB range
polaz Mar 14, 2026
de7e24b
refactor(seqno): centralize MAX_SEQNO as public constant
polaz Mar 14, 2026
3320180
fix(seqno): align OffsetGenerator boundaries with SequenceNumberCounter
polaz Mar 14, 2026
1fddda0
perf(data_block): seqno-aware seek for iterator bounds
polaz Mar 15, 2026
2b0b265
refactor(data_block): dedup seek predicate, harden seqno tests
polaz Mar 15, 2026
95ae8ab
fix(docs): add backticks around identifiers in seek_to_key_seqno doc
polaz Mar 15, 2026
d72761d
fix(seqno): resolve clippy warnings in SequenceNumberCounter
polaz Mar 15, 2026
a03b0de
ci: add CoordiNode CI and upstream monitor workflows
polaz Mar 15, 2026
2462f33
docs: add maintained fork notice and support section
polaz Mar 15, 2026
d456379
ci: add dependabot configuration for cargo and actions
polaz Mar 15, 2026
68faa56
ci: add release-plz workflow for automated changelog and releases
polaz Mar 15, 2026
9bf3cf8
ci: split PR checks from full matrix, reduce PR to lint + ubuntu test
polaz Mar 15, 2026
7d275e3
Merge branch 'main' into feat/#187-verify-integrity
polaz Mar 15, 2026
3c7368c
Merge branch 'main' into feat/#138-optimized-containsprefix
polaz Mar 15, 2026
2f119dd
Merge branch 'main' into feat/#237-data-block-seqno-aware-seek
polaz Mar 15, 2026
61c4701
Merge branch 'main' into feat/#96-multi-get
polaz Mar 15, 2026
d9ac1c2
Merge branch 'main' into feat/#174-sequencenumbergenerator-trait
polaz Mar 15, 2026
994436c
fix: resolve all clippy warnings for strict -D warnings CI
polaz Mar 15, 2026
e16fce2
Merge remote-tracking branch 'origin/main' into fix/#2-clippy-warnings
polaz Mar 15, 2026
c21d272
fix(decompress): use runtime validation instead of debug_assert for b…
polaz Mar 15, 2026
cb85fd4
test(block): add corruption test for lz4 byte count validation
polaz Mar 15, 2026
a6a675a
test(vlog): add corruption test for lz4 blob reader byte count valida…
polaz Mar 15, 2026
8f8a154
fix(filter,vlog): guard zero-key division and use checked cast
polaz Mar 15, 2026
5607259
fix(test): use lz4_flex::compress instead of compress_prepend_size
polaz Mar 15, 2026
0376989
docs: add Copilot review instructions with scope and issue-suggestion…
polaz Mar 15, 2026
e967130
Merge remote-tracking branch 'origin/main' into fix/#2-clippy-warnings
polaz Mar 15, 2026
b22f937
ci: add Copilot code review instructions with scope rules
polaz Mar 15, 2026
a677f03
Merge remote-tracking branch 'origin/main' into fix/#2-clippy-warnings
polaz Mar 15, 2026
dbb763a
refactor: upgrade #[allow] to #[expect] with reasons on all suppressions
polaz Mar 15, 2026
5a0575e
docs(table): expand get_highest_seqno docstring, add mixed insert+ing…
polaz Mar 15, 2026
84562fa
Merge remote-tracking branch 'upstream/main'
polaz Mar 15, 2026
3f65399
refactor: compute add_size as usize, remove unreachable wildcard arms
polaz Mar 15, 2026
fc10b94
Merge branch 'main' into fix/#2-clippy-warnings
polaz Mar 15, 2026
364f366
Merge branch 'fix/#2-clippy-warnings' of github.com:structured-world/…
polaz Mar 15, 2026
1a7995a
fix(blob,block): use checked_add for read_len, document size cap scope
polaz Mar 15, 2026
0cee933
Merge pull request #12 from structured-world/fix/#2-clippy-warnings
polaz Mar 15, 2026
d811d02
Merge branch 'main' into docs/#265-seqno-docstring-and-test
polaz Mar 15, 2026
f606019
Merge branch 'main' into feat/#174-sequencenumbergenerator-trait
polaz Mar 15, 2026
0ea0654
Merge branch 'main' into feat/#237-data-block-seqno-aware-seek
polaz Mar 15, 2026
80283a2
Merge branch 'main' into feat/#138-optimized-containsprefix
polaz Mar 15, 2026
72e3ea1
Merge branch 'main' into feat/#187-verify-integrity
polaz Mar 15, 2026
cccff65
Merge pull request #14 from structured-world/docs/#265-seqno-docstrin…
polaz Mar 15, 2026
b90986a
Merge branch 'main' into feat/#174-sequencenumbergenerator-trait
polaz Mar 15, 2026
9047beb
Merge branch 'main' into feat/#96-multi-get
polaz Mar 15, 2026
31fdb57
Merge branch 'main' into feat/#237-data-block-seqno-aware-seek
polaz Mar 15, 2026
b374e6d
Merge branch 'main' into feat/#138-optimized-containsprefix
polaz Mar 15, 2026
c846de3
Merge branch 'main' into feat/#187-verify-integrity
polaz Mar 15, 2026
5e4cbda
refactor(verify): harden IntegrityReport API and improve I/O
polaz Mar 15, 2026
4d71fb1
fix: address review feedback on contains_prefix
polaz Mar 15, 2026
caa3bb0
docs(test): clarify snapshot seqno semantics in multi_get test
polaz Mar 15, 2026
2590eb9
docs(data_block): document why reverse seeks accept but ignore seqno
polaz Mar 15, 2026
9043459
fix(verify): resolve strict clippy warnings
polaz Mar 15, 2026
7e102a3
fix(seqno): clarify docs and remove stale clippy suppression
polaz Mar 15, 2026
4a7d0ae
Merge pull request #6 from structured-world/feat/#138-optimized-conta…
polaz Mar 15, 2026
4c40606
Merge remote-tracking branch 'origin/main' into feat/#237-data-block-…
polaz Mar 15, 2026
5f42fc9
Merge remote-tracking branch 'origin/main' into feat/#174-sequencenum…
polaz Mar 15, 2026
de92363
Merge remote-tracking branch 'origin/main' into feat/#187-verify-inte…
polaz Mar 15, 2026
f164533
chore(merge): merge main into feat/#96-multi-get
polaz Mar 15, 2026
f2fe67a
refactor(seqno): improve test names and use MAX_SEQNO constant
polaz Mar 15, 2026
2e543c5
refactor(verify): remove redundant BufReader, document get() guard
polaz Mar 15, 2026
1f3dd30
Merge pull request #8 from structured-world/feat/#237-data-block-seqn…
polaz Mar 15, 2026
eb8d5ed
chore(merge): merge main into feat/#96-multi-get
polaz Mar 15, 2026
80d489d
Merge remote-tracking branch 'origin/main' into feat/#174-sequencenum…
polaz Mar 15, 2026
cc77932
test(verify): cover Display, Error::source, and blob IoError paths
polaz Mar 15, 2026
dafc12a
docs(seqno): clarify next() pre-increment and get() semantics
polaz Mar 15, 2026
c6fb576
Merge branch 'main' into feat/#174-sequencenumbergenerator-trait
polaz Mar 15, 2026
1522007
Merge branch 'feat/#174-sequencenumbergenerator-trait' of github.com:…
polaz Mar 15, 2026
66a36a6
Merge branch 'main' into feat/#187-verify-integrity
polaz Mar 15, 2026
d21b7ef
Merge pull request #9 from structured-world/feat/#96-multi-get
polaz Mar 15, 2026
44cbc41
Merge remote-tracking branch 'origin/main' into feat/#187-verify-inte…
polaz Mar 15, 2026
164df5d
Merge remote-tracking branch 'origin/feat/#187-verify-integrity' into…
polaz Mar 15, 2026
1b51866
Merge branch 'main' into feat/#174-sequencenumbergenerator-trait
polaz Mar 15, 2026
07956ec
fix(verify): handle EINTR in stream_checksum read loop
polaz Mar 15, 2026
33690b5
Merge branch 'main' into feat/#187-verify-integrity
polaz Mar 15, 2026
5bc4d19
fix(seqno): canonical should_panic syntax, precise trait docs, panic …
polaz Mar 15, 2026
d4f44ee
Merge branch 'feat/#174-sequencenumbergenerator-trait' of github.com:…
polaz Mar 15, 2026
86c2171
Merge pull request #10 from structured-world/feat/#174-sequencenumber…
polaz Mar 15, 2026
40279ed
Merge branch 'main' into feat/#187-verify-integrity
polaz Mar 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ pub mod util;

mod value;
mod value_type;

/// Integrity verification for SST and blob files.
pub mod verify;

mod version;
mod vlog;

Expand Down
192 changes: 192 additions & 0 deletions src/verify.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
// Copyright (c) 2024-present, fjall-rs
// This source code is licensed under both the Apache 2.0 and MIT License
// (found in the LICENSE-* files in the repository)

use crate::{checksum::Checksum, table::TableId};
Comment thread
polaz marked this conversation as resolved.
use std::path::PathBuf;

/// Describes a single integrity error found during verification.
#[derive(Debug)]
Comment thread
polaz marked this conversation as resolved.
#[non_exhaustive]
pub enum IntegrityError {
/// Full-file checksum mismatch for an SST table.
SstFileCorrupted {
/// Table ID
table_id: TableId,
/// Path to the corrupted file
path: PathBuf,
/// Checksum stored in the manifest
expected: Checksum,
/// Checksum computed from disk
got: Checksum,
},

/// Full-file checksum mismatch for a blob file.
BlobFileCorrupted {
/// Blob file ID
blob_file_id: u64,
/// Path to the corrupted file
Comment thread
polaz marked this conversation as resolved.
path: PathBuf,
/// Checksum stored in the manifest
expected: Checksum,
/// Checksum computed from disk
got: Checksum,
},
Comment thread
polaz marked this conversation as resolved.
Comment thread
polaz marked this conversation as resolved.

/// I/O error while reading a file during verification.
IoError {
/// Path to the file that could not be read
path: PathBuf,
/// The underlying I/O error
error: std::io::Error,
Comment thread
polaz marked this conversation as resolved.
},
Comment thread
polaz marked this conversation as resolved.
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

impl std::fmt::Display for IntegrityError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::SstFileCorrupted {
table_id,
path,
expected,
got,
} => write!(
f,
"SST table {table_id} corrupted at {}: expected {expected}, got {got}",
path.display()
),
Self::BlobFileCorrupted {
blob_file_id,
path,
expected,
got,
} => write!(
f,
"blob file {blob_file_id} corrupted at {}: expected {expected}, got {got}",
path.display()
),
Self::IoError { path, error } => {
write!(f, "I/O error reading {}: {}", path.display(), error)
}
}
}
}

/// Result of an integrity verification scan.
#[derive(Debug)]
pub struct IntegrityReport {
/// Number of SST table files checked.
pub sst_files_checked: usize,

/// Number of blob files checked.
pub blob_files_checked: usize,

/// Integrity errors found during verification.
pub errors: Vec<IntegrityError>,
}

impl IntegrityReport {
/// Returns `true` if no errors were found.
#[must_use]
pub fn is_ok(&self) -> bool {
self.errors.is_empty()
}

/// Total number of files checked (SST + blob).
#[must_use]
pub fn files_checked(&self) -> usize {
self.sst_files_checked + self.blob_files_checked
}
}

/// Computes a streaming XXH3 128-bit checksum for a file without loading it entirely into memory.
fn stream_checksum(path: &std::path::Path) -> std::io::Result<Checksum> {
use std::io::Read;

let mut reader = std::io::BufReader::new(std::fs::File::open(path)?);
let mut hasher = xxhash_rust::xxh3::Xxh3::default();
Comment thread
polaz marked this conversation as resolved.
Outdated
Comment thread
polaz marked this conversation as resolved.
Outdated
let mut buf = [0u8; 8192];

loop {
let n = reader.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}

Ok(Checksum::from_raw(hasher.digest128()))
}

/// Verifies full-file checksums for all SST and blob files in the given tree.
///
/// Each file's content is read from disk and hashed with XXHash-3 128-bit,
/// then compared against the checksum stored in the version manifest.
///
/// This detects silent bit-rot, partial writes, and other on-disk corruption.
///
/// Per-file errors (e.g., unreadable files, checksum mismatches) are collected
/// into [`IntegrityReport::errors`] — the scan always runs to completion.
pub fn verify_integrity(tree: &impl crate::AbstractTree) -> IntegrityReport {
Comment thread
coderabbitai[bot] marked this conversation as resolved.
let version = tree.current_version();

let mut report = IntegrityReport {
sst_files_checked: 0,
blob_files_checked: 0,
errors: Vec::new(),
};

// Verify all SST table files
for table in version.iter_tables() {
let path = &*table.path;
let expected = table.checksum();

match stream_checksum(path) {
Ok(got) if got != expected => {
report.errors.push(IntegrityError::SstFileCorrupted {
table_id: table.id(),
path: path.to_path_buf(),
expected,
got,
});
}
Ok(_) => {}
Err(e) => {
report.errors.push(IntegrityError::IoError {
path: path.to_path_buf(),
error: e,
});
}
}

report.sst_files_checked += 1;
}

// Verify all blob files
for blob_file in version.blob_files.iter() {
let path = blob_file.path();
let expected = blob_file.checksum();

match stream_checksum(path) {
Ok(got) if got != expected => {
report.errors.push(IntegrityError::BlobFileCorrupted {
blob_file_id: blob_file.id(),
path: path.to_path_buf(),
expected,
got,
});
}
Ok(_) => {}
Err(e) => {
report.errors.push(IntegrityError::IoError {
path: path.to_path_buf(),
error: e,
});
}
}

report.blob_files_checked += 1;
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

report
}
Loading
Loading