diff --git a/lib/dictBuilder/zdict.rs b/lib/dictBuilder/zdict.rs index b88f48a9..6f9107dd 100644 --- a/lib/dictBuilder/zdict.rs +++ b/lib/dictBuilder/zdict.rs @@ -5,7 +5,7 @@ use libc::{free, malloc, memcpy, size_t}; use crate::lib::common::bits::{ZSTD_NbCommonBytes, ZSTD_highbit32}; use crate::lib::common::error_private::{ERR_getErrorName, ERR_isError, Error}; use crate::lib::common::huf::{HUF_CElt, HUF_CTABLE_WORKSPACE_SIZE_U32, HUF_WORKSPACE_SIZE}; -use crate::lib::common::mem::{MEM_read64, MEM_readLE32, MEM_readST, MEM_writeLE32}; +use crate::lib::common::mem::{MEM_readLE32, MEM_readST, MEM_writeLE32}; use crate::lib::common::xxhash::ZSTD_XXH64; use crate::lib::common::zstd_internal::{ repStartValue, LLFSELog, MLFSELog, MaxLL, MaxML, OffFSELog, ZSTD_REP_NUM, @@ -45,7 +45,7 @@ struct offsetCount_t { count: u32, } -#[derive(Copy, Clone, Default)] +#[derive(Debug, Copy, Clone, Default)] #[repr(C)] struct DictItem { pos: u32, @@ -391,140 +391,117 @@ unsafe fn ZDICT_analyzePos( solution } -unsafe fn isIncluded( - ip: *const core::ffi::c_char, - into: *const core::ffi::c_char, - length: size_t, -) -> bool { - for u in 0..length { - if *ip.add(u) != *into.add(u) { - return false; - } - } +fn isIncluded(ip: &[u8], into: &[u8], length: size_t) -> bool { + // NOTE: the slices may not actually have `length` elements, + // that is OK if there is an unequal value before that. + let a = ip.iter().take(length); + let b = into.iter().take(length); - true + a.eq(b) } -unsafe fn ZDICT_tryMerge( - table: *mut DictItem, +fn ZDICT_tryMerge( + table: &mut [DictItem], mut elt: DictItem, eltNbToSkip: u32, - buffer: *const core::ffi::c_void, + buffer: &[u8], ) -> u32 { - let tableSize = (*table).pos; + let tableSize = table[0].pos; let eltEnd = (elt.pos).wrapping_add(elt.length); - let buf = buffer as *const core::ffi::c_char; - let mut u: u32 = 0; - u = 1; - while u < tableSize { - if (u != eltNbToSkip) - && (*table.offset(u as isize)).pos > elt.pos - && (*table.offset(u as isize)).pos <= eltEnd - { - let addedLength = ((*table.offset(u as isize)).pos).wrapping_sub(elt.pos); - let fresh2 = &mut (*table.offset(u as isize)).length; - *fresh2 = (*fresh2).wrapping_add(addedLength); - (*table.offset(u as isize)).pos = elt.pos; - let fresh3 = &mut (*table.offset(u as isize)).savings; - *fresh3 = (*fresh3).wrapping_add(elt.savings * addedLength / elt.length); - let fresh4 = &mut (*table.offset(u as isize)).savings; - *fresh4 = (*fresh4).wrapping_add(elt.length / 8); - elt = *table.offset(u as isize); - while u > 1 && (*table.offset(u.wrapping_sub(1) as isize)).savings < elt.savings { - *table.offset(u as isize) = *table.offset(u.wrapping_sub(1) as isize); - u = u.wrapping_sub(1); + let buf = buffer; + + /* tail overlap */ + let mut u = 1usize; + while u < tableSize as usize { + if (u as u32 != eltNbToSkip) && table[u].pos > elt.pos && table[u].pos <= eltEnd { + /* append */ + let addedLength = table[u].pos - elt.pos; + table[u].length += addedLength; + table[u].pos = elt.pos; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + table[u].savings += elt.length / 8; /* rough approx bonus */ + elt = table[u]; + /* sort : improve rank */ + while (u > 1) && (table[u - 1].savings < elt.savings) { + table[u] = table[u - 1]; + u -= 1; } - *table.offset(u as isize) = elt; - return u; + table[u] = elt; + return u as u32; } u = u.wrapping_add(1); } - u = 1; - while u < tableSize { - if u != eltNbToSkip { - if ((*table.offset(u as isize)).pos).wrapping_add((*table.offset(u as isize)).length) - >= elt.pos - && (*table.offset(u as isize)).pos < elt.pos - { - let addedLength_0 = eltEnd as core::ffi::c_int - - ((*table.offset(u as isize)).pos) - .wrapping_add((*table.offset(u as isize)).length) - as core::ffi::c_int; - let fresh5 = &mut (*table.offset(u as isize)).savings; - *fresh5 = (*fresh5).wrapping_add(elt.length / 8); - if addedLength_0 > 0 { - let fresh6 = &mut (*table.offset(u as isize)).length; - *fresh6 = (*fresh6 as core::ffi::c_uint) - .wrapping_add(addedLength_0 as core::ffi::c_uint); - let fresh7 = &mut (*table.offset(u as isize)).savings; - *fresh7 = (*fresh7 as core::ffi::c_uint).wrapping_add( - (elt.savings) - .wrapping_mul(addedLength_0 as core::ffi::c_uint) - .wrapping_div(elt.length), - ); - } - elt = *table.offset(u as isize); - while u > 1 && (*table.offset(u.wrapping_sub(1) as isize)).savings < elt.savings { - *table.offset(u as isize) = *table.offset(u.wrapping_sub(1) as isize); - u = u.wrapping_sub(1); - } - *table.offset(u as isize) = elt; - return u; + + /* front overlap */ + let mut u = 1usize; + while u < tableSize as usize { + if u == eltNbToSkip as usize { + u = u.wrapping_add(1); + continue; + } + + /* overlap, existing < new */ + if (table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos) { + /* append */ + let addedLength = eltEnd as i32 - (table[u].pos + table[u].length) as i32; /* note: can be negative */ + table[u].savings += elt.length / 8; /* rough approx bonus */ + if addedLength > 0 { + /* otherwise, elt fully included into existing */ + table[u].length += addedLength.unsigned_abs(); + /* rough approx */ + table[u].savings += elt.savings * addedLength.unsigned_abs() / elt.length; } - if MEM_read64( - buf.offset((*table.offset(u as isize)).pos as isize) as *const core::ffi::c_void - ) == MEM_read64(buf.offset(elt.pos as isize).add(1) as *const core::ffi::c_void) - && isIncluded( - buf.offset((*table.offset(u as isize)).pos as isize), - buf.offset(elt.pos as isize).add(1), - (*table.offset(u as isize)).length as size_t, - ) - { - let addedLength_1 = Ord::max( - (elt.length).wrapping_sub((*table.offset(u as isize)).length), - 1, - ) as size_t; - (*table.offset(u as isize)).pos = elt.pos; - let fresh8 = &mut (*table.offset(u as isize)).savings; - *fresh8 = (*fresh8).wrapping_add( - (elt.savings as size_t * addedLength_1 / elt.length as size_t) as u32, - ); - (*table.offset(u as isize)).length = Ord::min( - elt.length, - ((*table.offset(u as isize)).length).wrapping_add(1), - ); - return u; + /* sort : improve rank */ + elt = table[u]; + while (u > 1) && (table[u - 1].savings < elt.savings) { + table[u] = table[u - 1]; + u -= 1; } + table[u] = elt; + return u as u32; } + + if buf[table[u].pos as usize..][..8] == buf[elt.pos as usize + 1..][..8] { + if isIncluded( + &buf[table[u].pos as usize..], + &buf[elt.pos as usize + 1..], + table[u].length as usize, + ) { + let addedLength = Ord::max(1, elt.length.checked_sub(table[u].length).unwrap_or(1)); + table[u].pos = elt.pos; + table[u].savings += elt.savings * addedLength / elt.length; + table[u].length = Ord::min(elt.length, table[u].length + 1); + return u as u32; + } + } + u = u.wrapping_add(1); } + 0 } -unsafe fn ZDICT_removeDictItem(table: *mut DictItem, id: u32) { +fn ZDICT_removeDictItem(table: &mut [DictItem], id: u32) { debug_assert_ne!(id, 0); if id == 0 { return; // protection, should never happen } - let max = (*table).pos as isize; // convention: table[0].pos stores the number of elements - for u in id as isize..max.wrapping_sub(1) { - *table.offset(u) = *table.offset(u.wrapping_add(1)); + let max = table[0].pos as usize; // convention: table[0].pos stores the number of elements + for u in id as usize..max.wrapping_sub(1) { + table[u] = table[u + 1]; } - (*table).pos = ((*table).pos).wrapping_sub(1); + table[0].pos -= 1; } -unsafe fn ZDICT_insertDictItem( - table: *mut DictItem, - maxSize: u32, - elt: DictItem, - buffer: *const core::ffi::c_void, -) { +fn ZDICT_insertDictItem(table: &mut [DictItem], elt: DictItem, buffer: &[u8]) { + let maxSize = table.len() as u32; + // merge if possible let mut mergeId = ZDICT_tryMerge(table, elt, 0, buffer); if mergeId != 0 { let mut newMerge = 1; while newMerge != 0 { - newMerge = ZDICT_tryMerge(table, *table.offset(mergeId as isize), mergeId, buffer); + newMerge = ZDICT_tryMerge(table, table[mergeId as usize], mergeId, buffer); if newMerge != 0 { ZDICT_removeDictItem(table, mergeId); } @@ -535,33 +512,32 @@ unsafe fn ZDICT_insertDictItem( // insert let mut current: u32 = 0; - let mut nextElt = (*table).pos; + let mut nextElt = table[0].pos; if nextElt >= maxSize { nextElt = maxSize.wrapping_sub(1); } current = nextElt.wrapping_sub(1); - while (*table.offset(current as isize)).savings < elt.savings { - *table.offset(current.wrapping_add(1) as isize) = *table.offset(current as isize); + while (table[current as usize]).savings < elt.savings { + table[current.wrapping_add(1) as usize] = table[current as usize]; current = current.wrapping_sub(1); } - *table.offset(current.wrapping_add(1) as isize) = elt; - (*table).pos = nextElt.wrapping_add(1); + table[current as usize + 1] = elt; + table[0].pos = nextElt.wrapping_add(1); } -unsafe fn ZDICT_dictSize(dictList: *const DictItem) -> u32 { +unsafe fn ZDICT_dictSize(dictList: &[DictItem]) -> u32 { let mut u: u32 = 0; let mut dictSize = 0u32; u = 1; - while u < (*dictList).pos { - dictSize = dictSize.wrapping_add((*dictList.offset(u as isize)).length); + while u < dictList[0].pos { + dictSize = dictSize.wrapping_add((dictList[u as usize]).length); u = u.wrapping_add(1); } dictSize } unsafe fn ZDICT_trainBuffer_legacy( - dictList: *mut DictItem, - dictListSize: u32, + dictList: &mut [DictItem], buffer: &[u8], mut bufferSize: size_t, fileSizes: *const size_t, @@ -654,7 +630,7 @@ unsafe fn ZDICT_trainBuffer_legacy( continue; } - ZDICT_insertDictItem(dictList, dictListSize, solution, buffer.as_ptr().cast()); + ZDICT_insertDictItem(dictList, solution, buffer); cursor += solution.length as usize; if notificationLevel >= 2 && displayClock.elapsed() > refresh_rate { @@ -1311,8 +1287,7 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy( ) -> size_t { let nbSamples = samplesSizes.len() as u32; let dictListSize = Ord::max(Ord::max(10000, nbSamples), (maxDictSize / 16) as u32); - let dictList = malloc((dictListSize as size_t).wrapping_mul(::core::mem::size_of::())) - as *mut DictItem; + let mut dictList = vec![DictItem::default(); dictListSize as size_t]; let selectivity = if params.selectivityLevel == 0 { g_selectivity_default } else { @@ -1328,29 +1303,23 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy( let notificationLevel = params.zParams.notificationLevel; // checks - if dictList.is_null() { - return Error::memory_allocation.to_error_code(); - } if maxDictSize < ZDICT_DICTSIZE_MIN { // requested dictionary size is too small - free(dictList as *mut core::ffi::c_void); return Error::dstSize_tooSmall.to_error_code(); } if samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE { // not enough source to create dictionary - free(dictList as *mut core::ffi::c_void); return Error::dictionaryCreation_failed.to_error_code(); } - dictList.as_mut().unwrap().init(); + dictList[0].init(); // The samples must be followed by the noise band. debug_assert!(samples.len() >= samplesBuffSize + NOISELENGTH); // build dictionary ZDICT_trainBuffer_legacy( - dictList, - dictListSize, + &mut dictList, samples, samplesBuffSize, samplesSizes.as_ptr(), @@ -1361,24 +1330,23 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy( // display best matches if params.zParams.notificationLevel >= 3 { - let nb = Ord::min(25, (*dictList).pos); - let dictContentSize = ZDICT_dictSize(dictList); + let nb = Ord::min(25, dictList[0].pos); + let dictContentSize = ZDICT_dictSize(&dictList); eprintln!( "\n {} segments found, of total size {} ", - ((*dictList).pos).wrapping_sub(1), + (dictList[0].pos).wrapping_sub(1), dictContentSize, ); eprintln!("list {} best segments ", nb.wrapping_sub(1)); for u in 1..nb { - let pos = (*dictList.offset(u as isize)).pos; - let length = (*dictList.offset(u as isize)).length; + let pos = dictList[u as usize].pos; + let length = dictList[u as usize].length; let printedLength = Ord::min(40, length); debug_assert!((pos + length) as size_t <= samplesBuffSize); if pos as size_t > samplesBuffSize || pos.wrapping_add(length) as size_t > samplesBuffSize { - free(dictList as *mut core::ffi::c_void); return Error::GENERIC.to_error_code(); // should never happen } eprint!( @@ -1386,7 +1354,7 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy( u, length, pos, - (*dictList.offset(u as isize)).savings, + (dictList[u as usize]).savings, ); ZDICT_printHex(&samples[..printedLength as usize]); eprintln!("|"); @@ -1394,11 +1362,10 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy( } // create dictionary - let mut dictContentSize_0 = ZDICT_dictSize(dictList); + let mut dictContentSize_0 = ZDICT_dictSize(&dictList); #[expect(deprecated)] if dictContentSize_0 < ZDICT_CONTENTSIZE_MIN { // dictionary content too small - free(dictList as *mut core::ffi::c_void); return Error::dictionaryCreation_failed.to_error_code(); } @@ -1445,50 +1412,47 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy( } // limit dictionary size - let max = (*dictList).pos; // convention: dictList[0].pos contains the number of useful elements + let max = dictList[0].pos; // convention: dictList[0].pos contains the number of useful elements let mut currentSize = 0u32; let mut n: u32 = 1; while n < max { - currentSize = currentSize.wrapping_add((*dictList.offset(n as isize)).length); + currentSize = currentSize.wrapping_add((dictList[n as usize]).length); if currentSize as size_t > targetDictSize { - currentSize = currentSize.wrapping_sub((*dictList.offset(n as isize)).length); + currentSize = currentSize.wrapping_sub((dictList[n as usize]).length); break; } else { n = n.wrapping_add(1); } } - (*dictList).pos = n; + dictList[0].pos = n; dictContentSize_0 = currentSize; // build dictionary content let mut ptr = (dictBuffer as *mut u8).add(maxDictSize); - for u in 1..(*dictList).pos { - let l = (*dictList.offset(u as isize)).length; + for u in 1..dictList[0].pos { + let l = (dictList[u as usize]).length; ptr = ptr.offset(-(l as isize)); debug_assert!(ptr >= dictBuffer as *mut u8); if ptr < dictBuffer as *mut u8 { - free(dictList as *mut core::ffi::c_void); return Error::GENERIC.to_error_code(); // should not happen } memcpy( ptr as *mut core::ffi::c_void, - samples[(*dictList.offset(u as isize)).pos as usize..].as_ptr() - as *const core::ffi::c_void, + samples[(dictList[u as usize]).pos as usize..].as_ptr() as *const core::ffi::c_void, l as size_t, ); } - let dictSize = ZDICT_addEntropyTablesFromBuffer_advanced( + + + ZDICT_addEntropyTablesFromBuffer_advanced( dictBuffer, dictContentSize_0 as size_t, maxDictSize, samples, samplesSizes, params.zParams, - ); - - free(dictList as *mut core::ffi::c_void); - dictSize + ) } /// Train a dictionary from an array of samples. diff --git a/test-libzstd-rs-sys/src/dict_builder.rs b/test-libzstd-rs-sys/src/dict_builder.rs index c9c287d7..fabf79fd 100644 --- a/test-libzstd-rs-sys/src/dict_builder.rs +++ b/test-libzstd-rs-sys/src/dict_builder.rs @@ -278,6 +278,288 @@ fn test_train_from_buffer_legacy() { }); } +#[test] +#[cfg(not(target_family = "wasm"))] +#[cfg_attr(miri, ignore = "slow")] +fn test_train_from_buffer_legacy_try_merge_1() { + let _ = assert_eq_rs_c!({ + let sample_data: Vec = vec![ + 255, 255, 135, 167, 135, 135, 61, 0, 135, 255, 255, 255, 255, 61, 0, 135, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 135, 167, 135, 126, 135, 135, 135, 255, 255, + 135, 135, 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, 135, 255, + 255, 255, 255, 255, 135, 135, 255, 255, 135, 135, 135, 255, 255, 255, 255, 135, 167, + 135, 143, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, + 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, 135, 167, 135, 135, 61, 0, 135, 255, + 255, 255, 255, 61, 0, 135, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 135, 167, + 135, 126, 135, 135, 135, 255, 255, 135, 135, 255, 255, 255, 135, 167, 135, 143, 135, + 135, 135, 255, 255, 135, 135, 255, 255, 255, 255, 255, 135, 135, 255, 255, 135, 135, + 135, 255, 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, 135, 135, + 135, 135, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, + 135, 167, 135, 135, 61, 0, 135, 255, 255, 255, 255, 61, 0, 135, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 135, 167, 135, 126, 135, 135, 135, 255, 255, 135, 135, + 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, 135, 255, 255, 255, + 255, 255, 135, 135, 255, 255, 135, 135, 135, 255, 255, 255, 255, 135, 167, 135, 143, + 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, 135, 135, + 135, 135, 135, 135, 135, 135, 255, 255, 135, 167, 135, 135, 61, 0, 135, 255, 255, 255, + 255, 61, 0, 135, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 135, 167, 135, 126, + 135, 135, 135, 255, 255, 135, 135, 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, + 255, 255, 135, 135, 255, 255, 255, 255, 255, 135, 135, 255, 255, 135, 135, 135, 255, + 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, + 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, 135, 135, + 135, 255, 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, 135, 135, + 135, 135, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 135, 167, 135, 126, 135, 135, 135, 255, 255, + 135, 135, 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, 135, 255, + 255, 255, 255, 255, 135, 135, 255, 255, 135, 135, 135, 255, 255, 255, 255, 135, 167, + 135, 143, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 167, 135, 135, 61, 0, 135, 255, 255, 255, + 255, 61, 0, 135, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 135, 167, 135, 126, + 135, 135, 135, 255, 255, 135, 135, 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, + 255, 255, 135, 135, 255, 255, 255, 255, 255, 135, 135, 255, 255, 135, 135, 135, 255, + 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, + 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, 135, 167, + 135, 135, 61, 0, 135, 255, 255, 255, 255, 61, 0, 135, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 135, 167, 135, 126, 135, 135, 135, 255, 255, 135, 135, 255, 255, + 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, 135, 255, 255, 255, 255, 255, + 135, 135, 255, 255, 135, 135, 135, 255, 255, 255, 255, 135, 167, 135, 143, 135, 135, + 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, 135, 135, 135, 135, + 135, 135, 135, 135, 255, 255, 135, 167, 135, 135, 61, 0, 135, 255, 255, 255, 255, 61, + 0, 135, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 135, 167, 135, 126, 135, 135, + 135, 255, 255, 135, 135, 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, + 135, 135, 255, 255, 255, 255, 255, 135, 135, 255, 255, 135, 135, 135, 255, 255, 255, + 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, + 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, 255, 135, 167, 135, + 143, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, + 135, 135, 135, 135, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, + 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, 135, 167, 135, 135, 61, 0, + 135, 255, 255, 255, 255, 61, 0, 135, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 135, 167, 135, 126, 135, 135, 135, 255, 255, 135, 135, 255, 255, 255, 135, 167, 135, + 143, 135, 135, 135, 255, 255, 135, 135, 255, 255, 255, 255, 255, 135, 135, 255, 255, + 135, 135, 135, 255, 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, + 135, 135, 135, 135, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, + 255, 255, 135, 167, 135, 135, 61, 0, 135, 255, 255, 255, 255, 61, 0, 135, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 135, 167, 135, 126, 135, 135, 135, 255, 255, + 135, 135, 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, 135, 255, + 255, 255, 255, 255, 135, 135, 255, 255, 135, 135, 135, 255, 255, 255, 255, 135, 167, + 135, 143, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, + 135, 135, 135, 255, 255, 135, 167, 135, 135, 61, 0, 135, 255, 255, 255, 255, 61, 0, + 135, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 135, 167, 135, 126, 135, 135, + 135, 255, 255, 135, 135, 255, 255, 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, + 135, 135, 255, 255, 255, 255, 255, 135, 135, 255, 255, 135, 135, 135, 255, 255, 255, + 255, 135, 167, 135, 143, 135, 135, 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, + 135, 255, 255, 135, 135, 135, 135, 135, 135, 135, 135, + ]; + + let sample_sizes = vec![ + 94, 94, 94, 94, 36, 78, 92, 94, 94, 30, 27, 27, 94, 94, 12, 94, + ]; + + let params = ZDICT_legacy_params_t { + zParams: ZDICT_params_t { + compressionLevel: 0, + notificationLevel: 0, + dictID: 0, + }, + selectivityLevel: 0, + }; + + let dict_capacity = 1024; + let mut dict_buffer = vec![0u8; dict_capacity]; + + let dict_size = ZDICT_trainFromBuffer_legacy( + dict_buffer.as_mut_ptr() as *mut c_void, + dict_buffer.len(), + sample_data.as_ptr() as *const c_void, + sample_sizes.as_ptr(), + sample_sizes.len() as u32, + params, + ); + + if ZDICT_isError(dict_size) != 0 { + Err(CStr::from_ptr(ZDICT_getErrorName(dict_size)).to_str()) + } else { + dict_buffer.truncate(dict_size); + + println!("Dictionary size: {}", dict_size); + + Ok(dict_buffer) + } + }); +} + +#[test] +#[cfg(not(target_family = "wasm"))] +#[cfg_attr(miri, ignore = "slow")] +fn test_train_from_buffer_legacy_try_merge_2() { + let _ = assert_eq_rs_c!({ + let mut sample_data: Vec = vec![ + 31, 31, 255, 31, 31, 31, 19, 19, 19, 19, 255, 255, 255, 19, 19, 19, 19, 255, 19, 255, + 255, 31, 31, 31, 19, 19, 19, 19, 19, 19, 19, 19, 255, 255, 255, 255, 255, 255, 31, 31, + 19, 255, 255, 255, 19, 19, 19, 19, 255, 19, 255, 255, 37, 31, 19, 19, 19, 69, 19, 3, + 19, 19, 255, 255, 255, 255, 255, 19, 19, 19, 19, 19, 31, 31, 255, 31, 31, 31, 19, 19, + 19, 19, 255, 255, 255, 19, 19, 19, 19, 255, 19, 255, 255, 31, 31, 31, 19, 19, 19, 19, + 19, 19, 19, 19, 255, 255, 255, 255, 255, 255, 31, 31, 19, 255, 255, 255, 19, 19, 19, + 19, 255, 19, 255, 255, 37, 31, 19, 19, 19, 69, 19, 3, 19, 19, 255, 255, 255, 255, 255, + 19, 19, 19, 19, 19, 31, 31, 255, 31, 31, 31, 19, 19, 19, 19, 255, 255, 255, 19, 19, 19, + 19, 255, 19, 255, 255, 31, 31, 31, 19, 19, 19, 19, 19, 19, 19, 19, 255, 255, 255, 255, + 255, 255, 31, 31, 19, 255, 255, 255, 19, 19, 19, 19, 255, 19, 255, 255, 37, 31, 19, 19, + 19, 69, 19, 3, 19, 19, 255, 255, 255, 255, 255, 19, 19, 19, 19, 19, 31, 255, 31, 31, + 31, 19, 19, 19, 19, 255, 255, 255, 19, 19, 19, 19, 255, 19, 255, 255, 31, 31, 31, 19, + 19, 19, 19, 19, 19, 19, 19, 255, 255, 255, 255, 255, 255, 31, 31, 19, 255, 255, 255, + 19, 19, 19, 19, 255, 19, 255, 255, 37, 31, 19, 19, 19, 69, 19, 3, 19, 19, 255, 255, + 255, 255, 255, 19, 19, 19, 19, 19, 31, 31, 255, 31, 31, 31, 19, 19, 19, 19, 255, 255, + 255, 19, 19, 19, 19, 255, 19, 255, 255, 31, 31, 31, 19, 19, 19, 19, 19, 19, 19, 19, + 255, 255, 255, 255, 255, 255, 31, 31, 19, 255, 255, 255, 19, 19, 19, 19, 255, 19, 255, + 255, 37, 31, 19, 19, 19, 69, 19, 3, 19, 19, 255, 255, 255, 255, 255, 19, 19, 19, 19, + 19, 31, 31, 255, 31, 31, 31, 19, 19, 19, 19, 255, 255, 255, 19, 19, 19, 19, 255, 19, + 255, 255, 31, 31, 31, 19, 19, 19, 19, 19, 19, 19, 19, 255, 255, 255, 255, 255, 255, 31, + 31, 19, 255, 255, 255, 19, 19, 19, 19, 255, 19, 255, 255, 37, 31, 19, 19, 19, 69, 19, + 3, 19, 19, 255, 255, 255, 255, 255, 19, 19, 19, 19, 19, 31, 31, 255, 31, 31, 31, 19, + 19, 19, 19, 255, 255, 255, 19, 19, 19, 19, 255, 19, 255, 255, 31, 31, 31, 19, 19, 19, + 19, 19, 19, 19, 19, 255, 255, 255, 255, 255, 255, 31, 31, 19, 255, 255, 255, 19, 19, + 19, 19, 255, 19, 255, 255, 37, 31, 19, 19, 19, 69, 19, 3, 19, 19, 255, 255, 255, 255, + 255, 19, 19, 19, 19, 19, 255, 31, 31, 31, 19, 19, 19, 19, 19, 19, 19, 19, 255, 255, + 255, 255, 255, 255, 31, 31, 19, 255, 255, 255, 19, 19, 19, 19, 255, 19, 255, 255, 37, + 31, 19, 19, 19, 69, 19, 3, 19, 19, 255, 255, 255, 255, 255, 19, 19, 19, 19, 19, 31, 31, + 255, 31, 31, 31, 19, 19, 19, 19, 255, 255, 255, 19, 19, 19, 19, 255, 19, 255, 255, 31, + 31, 31, 19, 19, 19, 19, 19, 19, 19, 19, 255, 255, 255, 255, 255, 255, 31, 31, 19, 255, + 255, 255, 19, 19, 19, 19, 255, 19, 255, 255, 37, 31, 19, 19, 19, 69, 19, 3, 19, 19, + 255, 255, 255, 255, 255, 19, 19, 19, 19, 19, 31, 31, 19, 19, 19, 19, 255, 255, 255, 19, + 19, 19, 19, 255, 19, 255, 255, 31, 31, 31, 19, 19, 19, 19, 19, 19, 19, 19, 255, 255, + 255, 255, 255, 255, 31, 31, 19, 255, 255, 255, 19, 19, 19, 19, 255, 19, 255, 255, 37, + 31, 19, 19, 19, 69, 19, 3, 19, 19, 255, 255, 255, 255, 255, 19, 19, 19, 19, 19, 31, 19, + 255, 255, 255, 19, 19, 19, 19, 255, 19, 255, 255, 37, 31, 19, 19, 19, 69, 19, 3, 19, + 19, 255, 255, 255, 255, 255, 19, 19, 19, 19, 19, + ]; + + let sample_sizes = vec![72, 72, 72, 71, 72, 72, 72, 52, 72, 68, 33]; + + sample_data.resize(sample_sizes.iter().sum::(), 0u8); + + let params = ZDICT_legacy_params_t { + zParams: ZDICT_params_t { + compressionLevel: 0, + notificationLevel: 0, + dictID: 0, + }, + selectivityLevel: 0, + }; + + let dict_capacity = 1024; + let mut dict_buffer = vec![0u8; dict_capacity]; + + let dict_size = ZDICT_trainFromBuffer_legacy( + dict_buffer.as_mut_ptr() as *mut c_void, + dict_buffer.len(), + sample_data.as_ptr() as *const c_void, + sample_sizes.as_ptr(), + sample_sizes.len() as u32, + params, + ); + + if ZDICT_isError(dict_size) != 0 { + Err(CStr::from_ptr(ZDICT_getErrorName(dict_size)).to_str()) + } else { + dict_buffer.truncate(dict_size); + + println!("Dictionary size: {}", dict_size); + + Ok(dict_buffer) + } + }); +} + +#[test] +#[cfg(not(target_family = "wasm"))] +#[cfg_attr(miri, ignore = "slow")] +fn test_train_from_buffer_legacy_try_merge_3() { + let _ = assert_eq_rs_c!({ + let mut sample_data: Vec = vec![ + 31, 31, 19, 255, 255, 19, 41, 43, 255, 255, 255, 255, 255, 255, 255, 19, 255, 255, 46, + 255, 255, 255, 255, 31, 255, 255, 19, 255, 19, 255, 19, 255, 127, 19, 255, 19, 255, + 255, 31, 255, 255, 19, 19, 255, 19, 255, 19, 247, 19, 255, 19, 255, 19, 19, 255, 19, + 255, 19, 255, 19, 255, 14, 255, 19, 19, 255, 19, 255, 19, 255, 19, 239, 19, 255, 19, + 255, 19, 255, 31, 31, 19, 255, 255, 19, 41, 43, 255, 255, 255, 255, 255, 255, 255, 19, + 255, 255, 46, 255, 255, 255, 255, 31, 255, 255, 19, 255, 19, 255, 19, 255, 127, 19, + 255, 19, 255, 255, 31, 255, 255, 19, 19, 255, 19, 255, 19, 247, 19, 255, 19, 255, 19, + 19, 255, 19, 255, 19, 255, 19, 255, 14, 255, 19, 19, 255, 19, 255, 19, 255, 19, 239, + 19, 255, 19, 255, 19, 255, 31, 31, 19, 255, 255, 19, 41, 43, 255, 255, 255, 255, 255, + 255, 255, 19, 255, 255, 46, 255, 255, 255, 255, 31, 255, 255, 19, 255, 19, 255, 19, + 255, 127, 19, 255, 19, 255, 255, 31, 255, 255, 19, 19, 255, 19, 255, 19, 247, 19, 255, + 19, 255, 19, 19, 255, 19, 255, 19, 255, 19, 255, 14, 255, 19, 19, 255, 19, 255, 19, + 255, 19, 239, 19, 255, 19, 255, 19, 255, 31, 19, 255, 255, 19, 41, 43, 255, 255, 255, + 255, 255, 255, 255, 19, 255, 255, 46, 255, 255, 255, 255, 31, 255, 255, 19, 255, 19, + 255, 19, 255, 127, 19, 255, 19, 255, 255, 31, 255, 255, 19, 19, 255, 19, 255, 19, 247, + 19, 255, 19, 255, 19, 19, 255, 19, 255, 19, 255, 19, 255, 14, 255, 19, 19, 255, 19, + 255, 19, 255, 19, 239, 19, 255, 19, 255, 19, 255, 31, 31, 19, 255, 255, 19, 41, 43, + 255, 255, 255, 255, 255, 255, 255, 19, 255, 255, 46, 255, 255, 255, 255, 31, 255, 255, + 19, 255, 19, 255, 19, 255, 127, 19, 255, 19, 255, 255, 31, 255, 255, 19, 19, 255, 19, + 255, 19, 247, 19, 255, 19, 255, 19, 19, 255, 19, 255, 19, 255, 19, 255, 14, 255, 19, + 19, 255, 19, 255, 19, 255, 19, 239, 19, 255, 19, 255, 19, 255, 31, 19, 255, 255, 19, + 41, 43, 255, 255, 255, 255, 255, 255, 255, 19, 255, 255, 46, 255, 255, 255, 255, 31, + 255, 255, 19, 255, 19, 255, 19, 255, 127, 19, 255, 19, 255, 255, 31, 255, 255, 19, 19, + 255, 19, 255, 19, 247, 19, 255, 19, 255, 19, 19, 255, 19, 255, 19, 255, 19, 255, 14, + 255, 19, 19, 255, 19, 255, 19, 255, 19, 239, 19, 255, 19, 255, 19, 255, 19, 19, 255, + 19, 255, 19, 255, 19, 255, 14, 255, 19, 19, 255, 19, 255, 19, 255, 19, 239, 19, 255, + 19, 255, 19, 255, 31, 31, 19, 255, 255, 19, 41, 43, 255, 255, 255, 255, 255, 255, 255, + 19, 255, 255, 46, 255, 255, 255, 255, 31, 255, 255, 19, 255, 19, 255, 19, 255, 127, 19, + 255, 19, 255, 255, 31, 255, 255, 19, 19, 255, 19, 255, 19, 247, 19, 255, 19, 255, 19, + 19, 255, 19, 255, 19, 255, 19, 255, 14, 255, 19, 19, 255, 19, 255, 19, 255, 19, 239, + 19, 255, 19, 255, 19, 255, 31, 31, 19, 255, 255, 19, 41, 43, 255, 255, 255, 255, 255, + 255, 255, 19, 255, 255, 46, 255, 255, 255, 255, 31, 255, 255, 19, 255, 19, 255, 19, + 255, 127, 19, 255, 19, 255, 255, 31, 255, 255, 19, 19, 255, 19, 255, 19, 247, 19, 255, + 19, 255, 19, 19, 255, 19, 255, 19, 255, 19, 255, 14, 255, 19, 19, 255, 19, 255, 19, + 255, 19, 239, 19, 255, 19, 255, 19, 255, 31, 31, 19, 255, 255, 19, 41, 43, 255, 255, + 255, 255, 255, 255, 255, 19, 255, 255, 46, 255, 255, 255, 255, 31, 255, 255, 19, 255, + 19, 255, 19, 255, 127, 19, 255, 19, 255, 255, 31, 255, 255, 19, 19, 255, 19, 255, 19, + 247, 19, 255, 19, 255, 19, 19, 255, 19, 255, 19, 255, 19, 255, 14, 255, 19, 19, 255, + 19, 255, 19, 255, 19, 239, 19, 255, 19, 255, 19, 255, 31, 31, 19, 255, 255, 19, 41, 43, + 255, 255, 255, 255, 255, 255, 255, 19, 255, 255, 46, 255, 255, 255, 255, 31, 255, 255, + 19, 255, 19, 255, 19, 255, 127, 19, 255, 19, 255, 255, 31, 255, 255, 19, 19, 255, 19, + 255, 19, 247, 19, 255, 19, 255, 19, 19, 255, 19, 255, 19, 255, 19, 255, 14, 255, 19, + 19, 255, 19, 255, 19, 255, 19, 239, 19, 255, 19, 255, 19, 255, + ]; + + let sample_sizes = vec![78, 78, 78, 77, 78, 77, 26, 78, 78, 78, 78]; + + sample_data.resize(sample_sizes.iter().sum::(), 0u8); + + let params = ZDICT_legacy_params_t { + zParams: ZDICT_params_t { + compressionLevel: 0, + notificationLevel: 0, + dictID: 0, + }, + selectivityLevel: 0, + }; + + let dict_capacity = 1024; + let mut dict_buffer = vec![0u8; dict_capacity]; + + let dict_size = ZDICT_trainFromBuffer_legacy( + dict_buffer.as_mut_ptr() as *mut c_void, + dict_buffer.len(), + sample_data.as_ptr() as *const c_void, + sample_sizes.as_ptr(), + sample_sizes.len() as u32, + params, + ); + + if ZDICT_isError(dict_size) != 0 { + Err(CStr::from_ptr(ZDICT_getErrorName(dict_size)).to_str()) + } else { + dict_buffer.truncate(dict_size); + + println!("Dictionary size: {}", dict_size); + + Ok(dict_buffer) + } + }); +} + #[test] #[cfg(not(target_family = "wasm"))] fn test_optimize_train_from_buffer_cover_single_threaded() {