Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 73 additions & 18 deletions lib/dictBuilder/zdict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,30 +90,30 @@ pub unsafe extern "C" fn ZDICT_getDictID(
if MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY {
return 0;
}
MEM_readLE32((dictBuffer as *const core::ffi::c_char).add(4) as *const core::ffi::c_void)
MEM_readLE32(dictBuffer.byte_add(4))
}

#[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(ZDICT_getDictHeaderSize))]
pub unsafe extern "C" fn ZDICT_getDictHeaderSize(
dictBuffer: *const core::ffi::c_void,
dictSize: size_t,
) -> size_t {
let mut headerSize: size_t = 0;
if dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY {
return Error::dictionary_corrupted.to_error_code();
}
let bs = malloc(::core::mem::size_of::<ZSTD_compressedBlockState_t>())
as *mut ZSTD_compressedBlockState_t;
let wksp = malloc(HUF_WORKSPACE_SIZE as size_t) as *mut u32;
if bs.is_null() || wksp.is_null() {
headerSize = Error::memory_allocation.to_error_code();
} else {
ZSTD_reset_compressedBlockState(bs);
headerSize = ZSTD_loadCEntropy(bs, wksp as *mut core::ffi::c_void, dictBuffer, dictSize);
}
free(bs as *mut core::ffi::c_void);
free(wksp as *mut core::ffi::c_void);
headerSize

// FIXME: in 1.92 we can use https://doc.rust-lang.org/std/boxed/struct.Box.html#method.new_zeroed
let mut bs = Box::<ZSTD_compressedBlockState_t>::new_uninit();
unsafe { ZSTD_reset_compressedBlockState(bs.as_mut_ptr()) };

let mut wksp = Box::<[u32]>::new_uninit_slice(HUF_WORKSPACE_SIZE as size_t / 4);

ZSTD_loadCEntropy(
bs.as_mut_ptr(),
wksp.as_mut_ptr() as *mut core::ffi::c_void,
dictBuffer,
dictSize,
)
}

unsafe fn ZDICT_count(
Expand Down Expand Up @@ -142,6 +142,7 @@ unsafe fn ZDICT_initDictItem(d: *mut dictItem) {
(*d).length = 0;
(*d).savings = -(1 as core::ffi::c_int) as u32;
}

const LLIMIT: core::ffi::c_int = 64;
const MINMATCHLENGTH: core::ffi::c_int = 7;
unsafe fn ZDICT_analyzePos(
Expand Down Expand Up @@ -715,15 +716,15 @@ unsafe fn ZDICT_trainBuffer_legacy(
result
}

fn ZDICT_fillNoise(buffer: &mut [u8]) {
fn fill_noise(buffer: &mut [u8]) {
const prime1: u32 = 2654435761;
const prime2: u32 = 2246822519;

let mut acc = prime1;

for e in buffer.iter_mut() {
for v in buffer.iter_mut() {
acc = acc.wrapping_mul(prime2);
*e = (acc >> 21) as u8;
*v = (acc >> 21) as u8;
}
}

Expand Down Expand Up @@ -1575,7 +1576,7 @@ pub unsafe extern "C" fn ZDICT_trainFromBuffer_legacy(
}
let mut new_buf = vec![0u8; sBuffSize.wrapping_add(NOISELENGTH as size_t)];
core::ptr::copy_nonoverlapping(samplesBuffer.cast::<u8>(), new_buf.as_mut_ptr(), sBuffSize);
ZDICT_fillNoise(&mut new_buf[sBuffSize..]);
fill_noise(&mut new_buf[sBuffSize..]);
ZDICT_trainFromBuffer_unsafe_legacy(
dictBuffer,
dictBufferCapacity,
Expand Down Expand Up @@ -1631,3 +1632,57 @@ pub unsafe extern "C" fn ZDICT_addEntropyTablesFromBuffer(
params,
)
}

#[cfg(test)]
mod test {
use super::*;

const DICT: &[u8] = include_bytes!("../../test-libzstd-rs-sys/test-data/test-dict.dat");

#[test]
fn test_get_dict_header_size() {
let empty: &[u8] = &[];
let code = unsafe { ZDICT_getDictHeaderSize(empty.as_ptr().cast(), empty.len()) };
assert_eq!(
Error::from_error_code(code),
Some(Error::dictionary_corrupted)
);

let no_magic: &[u8] = &[0; 8];
let code = unsafe { ZDICT_getDictHeaderSize(no_magic.as_ptr().cast(), no_magic.len()) };
assert_eq!(
Error::from_error_code(code),
Some(Error::dictionary_corrupted)
);

let code = unsafe { ZDICT_getDictHeaderSize(DICT.as_ptr().cast(), DICT.len()) };
match Error::from_error_code(code) {
Some(err) => panic!("{:?}", err),
None => assert_eq!(code, 133),
}
}

#[test]
fn test_get_dict_id() {
let empty: &[u8] = &[];
let code = unsafe { ZDICT_getDictID(empty.as_ptr().cast(), empty.len()) };
assert_eq!(code, 0);

let no_magic: &[u8] = &[0; 8];
let code = unsafe { ZDICT_getDictID(no_magic.as_ptr().cast(), no_magic.len()) };
assert_eq!(code, 0);

let code = unsafe { ZDICT_getDictID(DICT.as_ptr().cast(), DICT.len()) };
assert_eq!(code, 1877512422);
}

#[test]
fn test_fill_noise() {
let mut buf = vec![0u8; 16];
fill_noise(&mut buf);
assert_eq!(
buf,
[226, 51, 247, 105, 221, 225, 137, 112, 5, 188, 15, 79, 183, 243, 110, 209]
);
}
}
Binary file added test-libzstd-rs-sys/test-data/test-dict.dat
Binary file not shown.