Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .github/workflows/kani.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Kani

on:
push:
branches: ["develop"]
pull_request: {}
workflow_dispatch: {}

permissions:
actions: read
contents: read

jobs:
kani:
name: "kani"
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
- name: Run Kani verification
uses: model-checking/kani-github-action@v1.1
with:
args: --output-format terse
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ name = "fsst"
[lints.rust]
warnings = "deny"
missing_docs = "deny"
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)'] }

[lints.clippy]
all = { level = "deny", priority = -1 }
Expand Down
37 changes: 37 additions & 0 deletions src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -986,3 +986,40 @@ mod test {
);
}
}

#[cfg(kani)]
mod kani_proofs {
use super::*;

/// Tier 2: Verify bitmap set/is_set consistency.
///
/// After calling `set(index)` on a bitmap, `is_set(index)` must return true.
/// This proof verifies the bit manipulation in the bitmap implementation
/// correctly tracks which indices have been set.
#[kani::proof]
fn proof_bitmap_set_is_set() {
let index: usize = kani::any();
kani::assume(index <= FSST_CODE_MASK as usize);

let mut bitmap = CodesBitmap::default();
bitmap.set(index);
assert!(bitmap.is_set(index));
}

/// Tier 2: Verify bitmap index calculation stays in bounds.
///
/// The bitmap uses `index >> 6` to select which u64 in the array to modify,
/// and `index % 64` to select which bit. This proof verifies that for any
/// valid index (0..=511), the array access is always in bounds.
#[kani::proof]
fn proof_bitmap_index_bounds() {
let index: usize = kani::any();
kani::assume(index <= FSST_CODE_MASK as usize);

let map = index >> 6;
assert!(map < 8);

let bit = index % 64;
assert!(bit < 64);
}
}
110 changes: 109 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,7 @@ pub(crate) fn advance_8byte_word(word: u64, bytes: usize) -> u64 {
fn validate_symbol_order(symbol_lens: &[u8]) {
// Ensure that the symbol table is ordered by length, 23456781
let mut expected = 2;
for (idx, &len) in symbol_lens.iter().enumerate() {
for (_idx, &len) in symbol_lens.iter().enumerate() {
if expected == 1 {
assert_eq!(
len, 1,
Expand Down Expand Up @@ -1007,3 +1007,111 @@ mod test {
assert_eq!(built_symbols, symbols);
}
}

#[cfg(kani)]
mod kani_proofs {
use super::*;

/// Tier 1: Verify escape code detection bit manipulation correctness.
///
/// The escape mask formula detects bytes equal to 0xFF (ESCAPE_CODE) in an 8-byte word.
/// This proof verifies that for any input word, the high bit in each byte position
/// of the escape_mask is set if and only if that byte equals ESCAPE_CODE (0xFF).
#[kani::proof]
fn proof_escape_mask_correctness() {
let next_block: u64 = kani::any();
let escape_mask = (next_block & 0x8080808080808080)
& ((((!next_block) & 0x7F7F7F7F7F7F7F7F) + 0x7F7F7F7F7F7F7F7F) ^ 0x8080808080808080);

// Verify: high bit set in each byte position iff that byte == 0xFF
for i in 0..8u32 {
let byte = ((next_block >> (i * 8)) & 0xFF) as u8;
let mask_bit = (escape_mask >> (i * 8 + 7)) & 1;
assert!((byte == ESCAPE_CODE) == (mask_bit == 1));
}
}

/// Tier 1: Verify symbol length calculation bounds.
///
/// Symbol::len() must always return a value in [1, 8] for any possible u64 representation.
/// The special case of all-zeros (0x0000000000000000) should return 1, not 0.
#[kani::proof]
fn proof_symbol_len_bounds() {
let value: u64 = kani::any();
let len = Symbol(value).len();
assert!(len >= 1 && len <= 8);
}

/// Tier 1: Verify symbol concatenation produces valid results.
///
/// When two symbols are concatenated, the resulting symbol must have a valid length
/// (between the length of the first symbol and 8).
#[kani::proof]
fn proof_symbol_concat_valid() {
let a: u64 = kani::any();
let b: u64 = kani::any();
let sym_a = Symbol(a);
let sym_b = Symbol(b);
kani::assume(sym_a.len() + sym_b.len() <= 8);

let result = sym_a.concat(sym_b);
assert!(result.len() >= sym_a.len());
assert!(result.len() <= 8);
}

/// Tier 1: Verify word advancement has no undefined behavior from oversized shifts.
///
/// Shifting a u64 by 64 or more bits is undefined behavior in Rust/LLVM.
/// This proof verifies that advance_8byte_word correctly handles the edge case
/// where bytes == 8, which would otherwise require a 64-bit shift.
#[kani::proof]
fn proof_advance_word_no_ub() {
let word: u64 = kani::any();
let bytes: usize = kani::any();
kani::assume(bytes <= 8);

let result = advance_8byte_word(word, bytes);
if bytes == 8 {
assert!(result == 0);
}
}

/// Tier 2: Verify code bit-packing produces correct field extraction.
///
/// The Code type packs a code value (8 bits), an "is symbol" flag (1 bit),
/// and a length (4 bits) into a u16. This proof verifies that these fields
/// can be correctly extracted after construction.
#[kani::proof]
fn proof_code_field_extraction() {
let code_val: u8 = kani::any();
let len: usize = kani::any();
kani::assume(len >= 1 && len <= 8);
kani::assume(code_val < 255); // Code 255 is reserved for ESCAPE_CODE

let code = Code::new_symbol(code_val, len);
assert!(code.code() == code_val);
assert!(code.len() == len as u16);
}

/// Tier 3: Bounded round-trip verification for small inputs.
///
/// This proof verifies that for an empty compressor (no trained symbols),
/// compression followed by decompression produces the original input.
/// With an empty symbol table, all bytes are escaped, making this a
/// tractable verification target.
#[kani::proof]
#[kani::unwind(17)]
fn proof_roundtrip_empty_compressor() {
let input: [u8; 4] = kani::any();
let compressor = CompressorBuilder::new().build();
let decompressor = compressor.decompressor();

let compressed = compressor.compress(&input);
let decompressed = decompressor.decompress(&compressed);

assert!(decompressed.len() == input.len());
for i in 0..input.len() {
assert!(decompressed[i] == input[i]);
}
}
}
18 changes: 18 additions & 0 deletions src/lossy_pht.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,21 @@ impl Default for LossyPHT {
Self::new()
}
}

#[cfg(kani)]
mod kani_proofs {
use super::*;

/// Tier 2: Verify hash slot calculation always produces in-bounds index.
///
/// The hash table lookup uses `fsst_hash(prefix_3bytes) & (HASH_TABLE_SIZE - 1)`
/// to compute the slot index. This proof verifies that for any input word,
/// the computed slot is always less than HASH_TABLE_SIZE.
#[kani::proof]
fn proof_hash_slot_bounds() {
let word: u64 = kani::any();
let prefix_3bytes = word & 0xFF_FF_FF;
let slot = fsst_hash(prefix_3bytes) as usize & (HASH_TABLE_SIZE - 1);
assert!(slot < HASH_TABLE_SIZE);
}
}
Loading