diff --git a/c2rust-lib.rs b/c2rust-lib.rs index fc1f055f..419ceece 100644 --- a/c2rust-lib.rs +++ b/c2rust-lib.rs @@ -144,8 +144,9 @@ pub use crate::lib::dictBuilder::cover::{ pub use crate::lib::dictBuilder::fastcover::{ ZDICT_optimizeTrainFromBuffer_fastCover, ZDICT_trainFromBuffer_fastCover, }; +pub use crate::lib::dictBuilder::zdict::ZDICT_trainFromBuffer_legacy; pub use crate::lib::zdict::{ - experimental::{ZDICT_cover_params_t, ZDICT_fastCover_params_t}, + experimental::{ZDICT_cover_params_t, ZDICT_fastCover_params_t, ZDICT_legacy_params_t}, ZDICT_getDictID, ZDICT_getErrorName, ZDICT_isError, ZDICT_params_t, ZDICT_trainFromBuffer, }; diff --git a/lib/common/huf.rs b/lib/common/huf.rs index 7f4592e1..be2cb867 100644 --- a/lib/common/huf.rs +++ b/lib/common/huf.rs @@ -13,6 +13,9 @@ pub(crate) const HUF_TABLELOG_MAX: usize = 12; pub(crate) const HUF_TABLELOG_DEFAULT: core::ffi::c_int = 11; pub(crate) const HUF_SYMBOLVALUE_MAX: core::ffi::c_int = 255; +pub(crate) const HUF_CTABLE_WORKSPACE_SIZE_U32: usize = + (4 * (HUF_SYMBOLVALUE_MAX as usize + 1)) + 192; + /// Absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work const HUF_TABLELOG_ABSOLUTEMAX: usize = 12; const _: () = assert!( diff --git a/lib/common/zstd_internal.rs b/lib/common/zstd_internal.rs index 814dfcef..c58320d6 100644 --- a/lib/common/zstd_internal.rs +++ b/lib/common/zstd_internal.rs @@ -24,17 +24,17 @@ pub(crate) const bt_compressed: blockType_e = 2; pub(crate) const MINMATCH: core::ffi::c_int = 3; -pub(crate) const Litbits: core::ffi::c_int = 8; -pub(crate) const LitHufLog: core::ffi::c_int = 11; -pub(crate) const MaxLit: core::ffi::c_int = ((1) << Litbits) - 1; -pub(crate) const MaxML: core::ffi::c_int = 52; -pub(crate) const MaxLL: core::ffi::c_int = 35; -pub(crate) const DefaultMaxOff: core::ffi::c_int = 28; -pub(crate) const MaxOff: core::ffi::c_int = 31; +pub(crate) const Litbits: u32 = 8; +pub(crate) const LitHufLog: u32 = 11; +pub(crate) const MaxLit: u32 = (1 << Litbits) - 1; +pub(crate) const MaxML: u32 = 52; +pub(crate) const MaxLL: u32 = 35; +pub(crate) const DefaultMaxOff: u32 = 28; +pub(crate) const MaxOff: u32 = 31; pub(crate) const MaxSeq: usize = const_max(MaxLL as usize, MaxML as usize); /* Assumption : MaxOff < MaxLL,MaxML */ -pub(crate) const MLFSELog: core::ffi::c_int = 9; -pub(crate) const LLFSELog: core::ffi::c_int = 9; -pub(crate) const OffFSELog: core::ffi::c_int = 8; +pub(crate) const MLFSELog: u32 = 9; +pub(crate) const LLFSELog: u32 = 9; +pub(crate) const OffFSELog: u32 = 8; pub(crate) const MaxFSELog: usize = const_max( const_max(MLFSELog as usize, LLFSELog as usize), OffFSELog as usize, diff --git a/lib/compress/zstd_compress.rs b/lib/compress/zstd_compress.rs index 42bac4ac..a808263e 100644 --- a/lib/compress/zstd_compress.rs +++ b/lib/compress/zstd_compress.rs @@ -4655,7 +4655,7 @@ unsafe fn ZSTD_buildSequencesStatistics( }; stats.lastCountSize = 0; stats.longOffsets = ZSTD_seqToCodes(seqStorePtr); - let mut max = MaxLL as core::ffi::c_uint; + let mut max = MaxLL; let mostFrequent = HIST_countFast_wksp( countWorkspace, &mut max, @@ -4671,7 +4671,7 @@ unsafe fn ZSTD_buildSequencesStatistics( max, mostFrequent, nbSeq, - LLFSELog as core::ffi::c_uint, + LLFSELog, ((*prevEntropy).litlengthCTable).as_ptr(), LL_defaultNorm.as_ptr(), LL_defaultNormLog, @@ -4682,7 +4682,7 @@ unsafe fn ZSTD_buildSequencesStatistics( op as *mut core::ffi::c_void, oend.offset_from_unsigned(op), CTable_LitLength, - LLFSELog as u32, + LLFSELog, stats.LLtype as SymbolEncodingType_e, countWorkspace, max, @@ -4690,7 +4690,7 @@ unsafe fn ZSTD_buildSequencesStatistics( nbSeq, LL_defaultNorm.as_ptr(), LL_defaultNormLog, - MaxLL as u32, + MaxLL, ((*prevEntropy).litlengthCTable).as_ptr(), ::core::mem::size_of::<[FSE_CTable; 329]>(), entropyWorkspace, @@ -4704,7 +4704,7 @@ unsafe fn ZSTD_buildSequencesStatistics( stats.lastCountSize = countSize; } op = op.add(countSize); - let mut max_0 = MaxOff as core::ffi::c_uint; + let mut max_0 = MaxOff; let mostFrequent_0 = HIST_countFast_wksp( countWorkspace, &mut max_0, @@ -4713,7 +4713,7 @@ unsafe fn ZSTD_buildSequencesStatistics( entropyWorkspace, entropyWkspSize, ); - let defaultPolicy = (if max_0 <= DefaultMaxOff as core::ffi::c_uint { + let defaultPolicy = (if max_0 <= DefaultMaxOff { ZSTD_defaultAllowed as core::ffi::c_int } else { ZSTD_defaultDisallowed as core::ffi::c_int @@ -4725,7 +4725,7 @@ unsafe fn ZSTD_buildSequencesStatistics( max_0, mostFrequent_0, nbSeq, - OffFSELog as core::ffi::c_uint, + OffFSELog, ((*prevEntropy).offcodeCTable).as_ptr(), OF_defaultNorm.as_ptr(), OF_defaultNormLog, @@ -4736,7 +4736,7 @@ unsafe fn ZSTD_buildSequencesStatistics( op as *mut core::ffi::c_void, oend.offset_from_unsigned(op), CTable_OffsetBits, - OffFSELog as u32, + OffFSELog, stats.Offtype as SymbolEncodingType_e, countWorkspace, max_0, @@ -4744,7 +4744,7 @@ unsafe fn ZSTD_buildSequencesStatistics( nbSeq, OF_defaultNorm.as_ptr(), OF_defaultNormLog, - DefaultMaxOff as u32, + DefaultMaxOff, ((*prevEntropy).offcodeCTable).as_ptr(), ::core::mem::size_of::<[FSE_CTable; 193]>(), entropyWorkspace, @@ -4758,7 +4758,7 @@ unsafe fn ZSTD_buildSequencesStatistics( stats.lastCountSize = countSize_0; } op = op.add(countSize_0); - let mut max_1 = MaxML as core::ffi::c_uint; + let mut max_1 = MaxML; let mostFrequent_1 = HIST_countFast_wksp( countWorkspace, &mut max_1, @@ -4774,7 +4774,7 @@ unsafe fn ZSTD_buildSequencesStatistics( max_1, mostFrequent_1, nbSeq, - MLFSELog as core::ffi::c_uint, + MLFSELog, ((*prevEntropy).matchlengthCTable).as_ptr(), ML_defaultNorm.as_ptr(), ML_defaultNormLog, @@ -4785,7 +4785,7 @@ unsafe fn ZSTD_buildSequencesStatistics( op as *mut core::ffi::c_void, oend.offset_from_unsigned(op), CTable_MatchLength, - MLFSELog as u32, + MLFSELog, stats.MLtype as SymbolEncodingType_e, countWorkspace, max_1, @@ -4793,7 +4793,7 @@ unsafe fn ZSTD_buildSequencesStatistics( nbSeq, ML_defaultNorm.as_ptr(), ML_defaultNormLog, - MaxML as u32, + MaxML, ((*prevEntropy).matchlengthCTable).as_ptr(), ::core::mem::size_of::<[FSE_CTable; 363]>(), entropyWorkspace, @@ -5659,7 +5659,7 @@ unsafe fn ZSTD_buildBlockEntropyStats_literals( let nodeWksp = countWkspStart.add(countWkspSize); let nodeWkspSize = wkspEnd.offset_from_unsigned(nodeWksp); let mut maxSymbolValue = HUF_SYMBOLVALUE_MAX as core::ffi::c_uint; - let mut huffLog = LitHufLog as core::ffi::c_uint; + let mut huffLog = LitHufLog; let mut repeat = (*prevHuf).repeatMode; libc::memcpy( nextHuf as *mut core::ffi::c_void, @@ -6020,12 +6020,12 @@ unsafe fn ZSTD_estimateBlockSize_sequences( (*fseMetadata).ofType, ofCodeTable, nbSeq, - MaxOff as core::ffi::c_uint, + MaxOff, ((*fseTables).offcodeCTable).as_ptr(), core::ptr::null(), OF_defaultNorm.as_ptr(), OF_defaultNormLog, - DefaultMaxOff as u32, + DefaultMaxOff, workspace, wkspSize, )); @@ -6033,12 +6033,12 @@ unsafe fn ZSTD_estimateBlockSize_sequences( (*fseMetadata).llType, llCodeTable, nbSeq, - MaxLL as core::ffi::c_uint, + MaxLL, ((*fseTables).litlengthCTable).as_ptr(), LL_bits.as_ptr(), LL_defaultNorm.as_ptr(), LL_defaultNormLog, - MaxLL as u32, + MaxLL, workspace, wkspSize, )); @@ -6046,12 +6046,12 @@ unsafe fn ZSTD_estimateBlockSize_sequences( (*fseMetadata).mlType, mlCodeTable, nbSeq, - MaxML as core::ffi::c_uint, + MaxML, ((*fseTables).matchlengthCTable).as_ptr(), ML_bits.as_ptr(), ML_defaultNorm.as_ptr(), ML_defaultNormLog, - MaxML as u32, + MaxML, workspace, wkspSize, )); @@ -7343,7 +7343,7 @@ pub unsafe fn ZSTD_loadCEntropy( dictSize: size_t, ) -> size_t { let mut offcodeNCount: [core::ffi::c_short; 32] = [0; 32]; - let mut offcodeMaxValue = MaxOff as core::ffi::c_uint; + let mut offcodeMaxValue = MaxOff; let mut dictPtr = dict as *const u8; let dictEnd = dictPtr.add(dictSize); dictPtr = dictPtr.add(8); @@ -7390,7 +7390,7 @@ pub unsafe fn ZSTD_loadCEntropy( } dictPtr = dictPtr.add(offcodeHeaderSize); let mut matchlengthNCount: [core::ffi::c_short; 53] = [0; 53]; - let mut matchlengthMaxValue = MaxML as core::ffi::c_uint; + let mut matchlengthMaxValue = MaxML; let mut matchlengthLog: core::ffi::c_uint = 0; let matchlengthHeaderSize = FSE_readNCount( &mut matchlengthNCount, @@ -7415,14 +7415,11 @@ pub unsafe fn ZSTD_loadCEntropy( )) { return Error::dictionary_corrupted.to_error_code(); } - (*bs).entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat( - matchlengthNCount.as_mut_ptr(), - matchlengthMaxValue, - MaxML as core::ffi::c_uint, - ); + (*bs).entropy.fse.matchlength_repeatMode = + ZSTD_dictNCountRepeat(matchlengthNCount.as_mut_ptr(), matchlengthMaxValue, MaxML); dictPtr = dictPtr.add(matchlengthHeaderSize); let mut litlengthNCount: [core::ffi::c_short; 36] = [0; 36]; - let mut litlengthMaxValue = MaxLL as core::ffi::c_uint; + let mut litlengthMaxValue = MaxLL; let mut litlengthLog: core::ffi::c_uint = 0; let litlengthHeaderSize = FSE_readNCount( &mut litlengthNCount, @@ -7447,11 +7444,8 @@ pub unsafe fn ZSTD_loadCEntropy( )) { return Error::dictionary_corrupted.to_error_code(); } - (*bs).entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat( - litlengthNCount.as_mut_ptr(), - litlengthMaxValue, - MaxLL as core::ffi::c_uint, - ); + (*bs).entropy.fse.litlength_repeatMode = + ZSTD_dictNCountRepeat(litlengthNCount.as_mut_ptr(), litlengthMaxValue, MaxLL); dictPtr = dictPtr.add(litlengthHeaderSize); if dictPtr.add(12) > dictEnd { return Error::dictionary_corrupted.to_error_code(); @@ -7461,7 +7455,7 @@ pub unsafe fn ZSTD_loadCEntropy( *((*bs).rep).as_mut_ptr().add(2) = MEM_readLE32(dictPtr.add(8) as *const core::ffi::c_void); dictPtr = dictPtr.add(12); let dictContentSize = dictEnd.offset_from_unsigned(dictPtr); - let mut offcodeMax = MaxOff as u32; + let mut offcodeMax = MaxOff; if dictContentSize <= (-(1 as core::ffi::c_int) as u32) .wrapping_sub((128 as core::ffi::c_int * ((1 as core::ffi::c_int) << 10)) as u32) diff --git a/lib/compress/zstd_compress_literals.rs b/lib/compress/zstd_compress_literals.rs index 1f6697b4..f8f49cdf 100644 --- a/lib/compress/zstd_compress_literals.rs +++ b/lib/compress/zstd_compress_literals.rs @@ -251,7 +251,7 @@ pub unsafe fn ZSTD_compressLiterals( src, srcSize, HUF_SYMBOLVALUE_MAX as core::ffi::c_uint, - LitHufLog as core::ffi::c_uint, + LitHufLog, entropyWorkspace, entropyWorkspaceSize, ((*nextHuf).CTable).as_mut_ptr(), diff --git a/lib/compress/zstd_compress_sequences.rs b/lib/compress/zstd_compress_sequences.rs index b9ba6238..67ec8ce9 100644 --- a/lib/compress/zstd_compress_sequences.rs +++ b/lib/compress/zstd_compress_sequences.rs @@ -473,7 +473,7 @@ unsafe fn ZSTD_encodeSequences_body( ); if MEM_32bits() || ofBits_0.wrapping_add(mlBits).wrapping_add(llBits) - >= (64 - 7 - (LLFSELog + MLFSELog + OffFSELog)) as u32 + >= 64 - 7 - (LLFSELog + MLFSELog + OffFSELog) { BIT_flushBits(&mut blockStream); } diff --git a/lib/compress/zstd_compress_superblock.rs b/lib/compress/zstd_compress_superblock.rs index 4768e2a1..8dd81bdb 100644 --- a/lib/compress/zstd_compress_superblock.rs +++ b/lib/compress/zstd_compress_superblock.rs @@ -736,39 +736,39 @@ unsafe fn ZSTD_estimateSubBlockSize_sequences( cSeqSizeEstimate = cSeqSizeEstimate.wrapping_add(ZSTD_estimateSubBlockSize_symbolType( (*fseMetadata).ofType, ofCodeTable, - MaxOff as core::ffi::c_uint, + MaxOff, nbSeq, ((*fseTables).offcodeCTable).as_ptr(), core::ptr::null(), OF_defaultNorm.as_ptr(), OF_defaultNormLog, - DefaultMaxOff as u32, + DefaultMaxOff, workspace, wkspSize, )); cSeqSizeEstimate = cSeqSizeEstimate.wrapping_add(ZSTD_estimateSubBlockSize_symbolType( (*fseMetadata).llType, llCodeTable, - MaxLL as core::ffi::c_uint, + MaxLL, nbSeq, ((*fseTables).litlengthCTable).as_ptr(), LL_bits.as_ptr(), LL_defaultNorm.as_ptr(), LL_defaultNormLog, - MaxLL as u32, + MaxLL, workspace, wkspSize, )); cSeqSizeEstimate = cSeqSizeEstimate.wrapping_add(ZSTD_estimateSubBlockSize_symbolType( (*fseMetadata).mlType, mlCodeTable, - MaxML as core::ffi::c_uint, + MaxML, nbSeq, ((*fseTables).matchlengthCTable).as_ptr(), ML_bits.as_ptr(), ML_defaultNorm.as_ptr(), ML_defaultNormLog, - MaxML as u32, + MaxML, workspace, wkspSize, )); diff --git a/lib/compress/zstd_opt.rs b/lib/compress/zstd_opt.rs index 7d834c08..89aebe42 100644 --- a/lib/compress/zstd_opt.rs +++ b/lib/compress/zstd_opt.rs @@ -241,7 +241,7 @@ unsafe fn ZSTD_rescaleFreqs( let mut lit: core::ffi::c_uint = 0; (*optPtr).litSum = 0; lit = 0; - while lit <= MaxLit as core::ffi::c_uint { + while lit <= MaxLit { let scaleLog = 11u32; let bitCost = HUF_getNbBitsFromCTable( ((*(*optPtr).symbolCosts).huf.CTable).as_ptr(), @@ -272,7 +272,7 @@ unsafe fn ZSTD_rescaleFreqs( ); (*optPtr).litLengthSum = 0; ll = 0; - while ll <= MaxLL as core::ffi::c_uint { + while ll <= MaxLL { let scaleLog_0 = 10u32; let bitCost_0 = FSE_getMaxNbBits(llstate.symbolTT, ll); *((*optPtr).litLengthFreq).offset(ll as isize) = (if bitCost_0 != 0 { @@ -299,7 +299,7 @@ unsafe fn ZSTD_rescaleFreqs( ); (*optPtr).matchLengthSum = 0; ml = 0; - while ml <= MaxML as core::ffi::c_uint { + while ml <= MaxML { let scaleLog_1 = 10u32; let bitCost_1 = FSE_getMaxNbBits(mlstate.symbolTT, ml); *((*optPtr).matchLengthFreq).offset(ml as isize) = (if bitCost_1 != 0 { @@ -326,7 +326,7 @@ unsafe fn ZSTD_rescaleFreqs( ); (*optPtr).offCodeSum = 0; of = 0; - while of <= MaxOff as core::ffi::c_uint { + while of <= MaxOff { let scaleLog_2 = 10u32; let bitCost_2 = FSE_getMaxNbBits(ofstate.symbolTT, of); *((*optPtr).offCodeFreq).offset(of as isize) = (if bitCost_2 != 0 { @@ -337,12 +337,12 @@ unsafe fn ZSTD_rescaleFreqs( as core::ffi::c_uint; (*optPtr).offCodeSum = ((*optPtr).offCodeSum as core::ffi::c_uint) .wrapping_add(*((*optPtr).offCodeFreq).offset(of as isize)) - as u32 as u32; + as u32; of = of.wrapping_add(1); } } else { if compressedLiterals != 0 { - let mut lit_0 = MaxLit as core::ffi::c_uint; + let mut lit_0 = MaxLit; HIST_count_simple( (*optPtr).litFreq, &mut lit_0, @@ -350,7 +350,7 @@ unsafe fn ZSTD_rescaleFreqs( srcSize, ); (*optPtr).litSum = - ZSTD_downscaleStats((*optPtr).litFreq, MaxLit as u32, 8, base_0possible); + ZSTD_downscaleStats((*optPtr).litFreq, MaxLit, 8, base_0possible); } let baseLLfreqs: [core::ffi::c_uint; 36] = [ 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -365,11 +365,11 @@ unsafe fn ZSTD_rescaleFreqs( (*optPtr).litLengthSum = sum_u32(baseLLfreqs.as_ptr(), (MaxLL + 1) as size_t); let mut ml_0: core::ffi::c_uint = 0; ml_0 = 0; - while ml_0 <= MaxML as core::ffi::c_uint { + while ml_0 <= MaxML { *((*optPtr).matchLengthFreq).offset(ml_0 as isize) = 1; ml_0 = ml_0.wrapping_add(1); } - (*optPtr).matchLengthSum = (MaxML + 1) as u32; + (*optPtr).matchLengthSum = MaxML + 1; let baseOFCfreqs: [core::ffi::c_uint; 32] = [ 6, 2, 1, 1, 2, 3, 4, 4, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -384,11 +384,11 @@ unsafe fn ZSTD_rescaleFreqs( } } else { if compressedLiterals != 0 { - (*optPtr).litSum = ZSTD_scaleStats((*optPtr).litFreq, MaxLit as u32, 12); + (*optPtr).litSum = ZSTD_scaleStats((*optPtr).litFreq, MaxLit, 12); } - (*optPtr).litLengthSum = ZSTD_scaleStats((*optPtr).litLengthFreq, MaxLL as u32, 11); - (*optPtr).matchLengthSum = ZSTD_scaleStats((*optPtr).matchLengthFreq, MaxML as u32, 11); - (*optPtr).offCodeSum = ZSTD_scaleStats((*optPtr).offCodeFreq, MaxOff as u32, 11); + (*optPtr).litLengthSum = ZSTD_scaleStats((*optPtr).litLengthFreq, MaxLL, 11); + (*optPtr).matchLengthSum = ZSTD_scaleStats((*optPtr).matchLengthFreq, MaxML, 11); + (*optPtr).offCodeSum = ZSTD_scaleStats((*optPtr).offCodeFreq, MaxOff, 11); } ZSTD_setBasePrices(optPtr, optLevel); } diff --git a/lib/decompress/zstd_decompress.rs b/lib/decompress/zstd_decompress.rs index 0d848420..67a67336 100644 --- a/lib/decompress/zstd_decompress.rs +++ b/lib/decompress/zstd_decompress.rs @@ -2319,7 +2319,7 @@ pub fn ZSTD_loadDEntropy(entropy: &mut ZSTD_entropyDTables_t, dict: &[u8]) -> si dictPtr = &dictPtr[hSize..]; let mut offcodeNCount: [core::ffi::c_short; 32] = [0; 32]; - let mut offcodeMaxValue = MaxOff as core::ffi::c_uint; + let mut offcodeMaxValue = MaxOff; let mut offcodeLog: core::ffi::c_uint = 0; let offcodeHeaderSize = FSE_readNCount_slice( &mut offcodeNCount, @@ -2348,7 +2348,7 @@ pub fn ZSTD_loadDEntropy(entropy: &mut ZSTD_entropyDTables_t, dict: &[u8]) -> si ); dictPtr = &dictPtr[offcodeHeaderSize..]; let mut matchlengthNCount: [core::ffi::c_short; 53] = [0; 53]; - let mut matchlengthMaxValue = MaxML as core::ffi::c_uint; + let mut matchlengthMaxValue = MaxML; let mut matchlengthLog: core::ffi::c_uint = 0; let matchlengthHeaderSize = FSE_readNCount_slice( &mut matchlengthNCount, @@ -2376,7 +2376,7 @@ pub fn ZSTD_loadDEntropy(entropy: &mut ZSTD_entropyDTables_t, dict: &[u8]) -> si ); dictPtr = &dictPtr[matchlengthHeaderSize..]; let mut litlengthNCount: [core::ffi::c_short; 36] = [0; 36]; - let mut litlengthMaxValue = MaxLL as core::ffi::c_uint; + let mut litlengthMaxValue = MaxLL; let mut litlengthLog: core::ffi::c_uint = 0; let litlengthHeaderSize = FSE_readNCount_slice( &mut litlengthNCount, diff --git a/lib/decompress/zstd_decompress_block.rs b/lib/decompress/zstd_decompress_block.rs index 276403a8..89152be7 100644 --- a/lib/decompress/zstd_decompress_block.rs +++ b/lib/decompress/zstd_decompress_block.rs @@ -984,8 +984,8 @@ fn ZSTD_decodeSeqHeaders( &mut dctx.entropy.LLTable, &mut dctx.LLTptr, LLtype, - MaxLL as core::ffi::c_uint, - LLFSELog as u32, + MaxLL, + LLFSELog, &src[ip..], &LL_base, &LL_bits, @@ -1002,8 +1002,8 @@ fn ZSTD_decodeSeqHeaders( &mut dctx.entropy.OFTable, &mut dctx.OFTptr, OFtype, - MaxOff as core::ffi::c_uint, - OffFSELog as u32, + MaxOff, + OffFSELog, &src[ip..], &OF_base, &OF_bits, @@ -1020,8 +1020,8 @@ fn ZSTD_decodeSeqHeaders( &mut dctx.entropy.MLTable, &mut dctx.MLTptr, MLtype, - MaxML as core::ffi::c_uint, - MLFSELog as u32, + MaxML, + MLFSELog, &src[ip..], &ML_base, &ML_bits, @@ -1602,7 +1602,7 @@ fn ZSTD_decodeSequence( assert!(llBits <= MaxLLBits); assert!(mlBits <= MaxMLBits); - assert!(ofBits as core::ffi::c_int <= MaxOff); + assert!(ofBits as u32 <= MaxOff); let mut offset: size_t = 0; if ofBits > 1 { @@ -1682,7 +1682,7 @@ fn ZSTD_decodeSequence( } // Ensure there are enough bits to read the rest of data in 64-bit mode. - const { assert!(16 + LLFSELog + MLFSELog + OffFSELog < STREAM_ACCUMULATOR_MIN_64) }; + const { assert!(16 + LLFSELog + MLFSELog + OffFSELog < STREAM_ACCUMULATOR_MIN_64 as u32) }; if llBits > 0 { seq.litLength = (seq.litLength) @@ -2300,7 +2300,7 @@ impl SymbolTable { info.longOffsetShare += 1; } } - info.longOffsetShare <<= (OffFSELog as u32).wrapping_sub(tableLog); + info.longOffsetShare <<= OffFSELog.wrapping_sub(tableLog); info } diff --git a/lib/dictBuilder/cover.rs b/lib/dictBuilder/cover.rs index 2f4e21b6..55ae86d0 100644 --- a/lib/dictBuilder/cover.rs +++ b/lib/dictBuilder/cover.rs @@ -1257,7 +1257,9 @@ fn COVER_tryParameters(data: Box) { /// of [`core::slice::from_raw_parts`]. /// - `sum(samplesSizes)` is 0 or `samplesBuffer` and `sum(samplesSizes)` satisfy the requirements /// of [`core::slice::from_raw_parts`]. -/// - `parameters` satisfies the requirements of `pointer::as_mut` +/// - `parameters` satisfies the requirements of [`pointer::as_mut`] +/// +/// [`pointer::as_mut`]: https://doc.rust-lang.org/stable/core/primitive.pointer.html#method.as_mut #[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(ZDICT_optimizeTrainFromBuffer_cover))] pub unsafe extern "C" fn ZDICT_optimizeTrainFromBuffer_cover( dictBuffer: *mut core::ffi::c_void, diff --git a/lib/dictBuilder/fastcover.rs b/lib/dictBuilder/fastcover.rs index b5322964..7a60d208 100644 --- a/lib/dictBuilder/fastcover.rs +++ b/lib/dictBuilder/fastcover.rs @@ -665,7 +665,9 @@ fn train_from_buffer_fastcover( /// of [`core::slice::from_raw_parts`]. /// - `sum(samplesSizes)` is 0 or `samplesBuffer` and `sum(samplesSizes)` satisfy the requirements /// of [`core::slice::from_raw_parts`]. -/// - `parameters` satisfies the requirements of `pointer::as_mut` +/// - `parameters` satisfies the requirements of [`pointer::as_mut`] +/// +/// [`pointer::as_mut`]: https://doc.rust-lang.org/stable/core/primitive.pointer.html#method.as_mut #[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(ZDICT_optimizeTrainFromBuffer_fastCover))] pub unsafe extern "C" fn ZDICT_optimizeTrainFromBuffer_fastCover( dictBuffer: *mut core::ffi::c_void, diff --git a/lib/dictBuilder/zdict.rs b/lib/dictBuilder/zdict.rs index abc3e083..b02403bb 100644 --- a/lib/dictBuilder/zdict.rs +++ b/lib/dictBuilder/zdict.rs @@ -5,7 +5,7 @@ use libc::{free, malloc, memcpy, size_t}; use crate::lib::common::bits::{ZSTD_NbCommonBytes, ZSTD_highbit32}; use crate::lib::common::error_private::{ERR_getErrorName, ERR_isError, Error}; -use crate::lib::common::huf::{HUF_CElt, HUF_WORKSPACE_SIZE}; +use crate::lib::common::huf::{HUF_CElt, HUF_CTABLE_WORKSPACE_SIZE_U32, HUF_WORKSPACE_SIZE}; use crate::lib::common::mem::{MEM_read16, MEM_read64, MEM_readLE32, MEM_readST, MEM_writeLE32}; use crate::lib::common::xxhash::ZSTD_XXH64; use crate::lib::common::zstd_internal::{ @@ -124,15 +124,11 @@ unsafe fn ZDICT_count( loop { let diff = MEM_readST(pMatch) ^ MEM_readST(pIn); if diff == 0 { - pIn = (pIn as *const core::ffi::c_char).add(::core::mem::size_of::()) - as *const core::ffi::c_void; - pMatch = (pMatch as *const core::ffi::c_char).add(::core::mem::size_of::()) - as *const core::ffi::c_void; + pIn = pIn.byte_add(::core::mem::size_of::()); + pMatch = pMatch.byte_add(::core::mem::size_of::()); } else { - pIn = (pIn as *const core::ffi::c_char).offset(ZSTD_NbCommonBytes(diff) as isize) - as *const core::ffi::c_void; - return (pIn as *const core::ffi::c_char).offset_from(pStart) as core::ffi::c_long - as size_t; + pIn = pIn.byte_offset(ZSTD_NbCommonBytes(diff) as isize); + return pIn.byte_offset_from(pStart) as core::ffi::c_long as size_t; } } } @@ -728,7 +724,7 @@ fn fill_noise(buffer: &mut [u8]) { } } -const MAXREPOFFSET: core::ffi::c_int = 1024; +const MAXREPOFFSET: u32 = 1024; unsafe fn ZDICT_countEStats( esr: EStats_ress_t, params: &ZSTD_parameters, @@ -802,10 +798,10 @@ unsafe fn ZDICT_countEStats( let seq: *const SeqDef = (*seqStorePtr).sequencesStart; let mut offset1 = ((*seq).offBase).wrapping_sub(ZSTD_REP_NUM as u32); let mut offset2 = ((*seq.add(1)).offBase).wrapping_sub(ZSTD_REP_NUM as u32); - if offset1 >= MAXREPOFFSET as u32 { + if offset1 >= MAXREPOFFSET { offset1 = 0; } - if offset2 >= MAXREPOFFSET as u32 { + if offset2 >= MAXREPOFFSET { offset2 = 0; } repOffsets[offset1 as usize] += 3; @@ -814,17 +810,6 @@ unsafe fn ZDICT_countEStats( } } -unsafe fn ZDICT_totalSampleSize(fileSizes: *const size_t, nbFiles: core::ffi::c_uint) -> size_t { - let mut total = 0 as size_t; - let mut u: core::ffi::c_uint = 0; - u = 0; - while u < nbFiles { - total = total.wrapping_add(*fileSizes.offset(u as isize)); - u = u.wrapping_add(1); - } - total -} - fn ZDICT_insertSortCount( table: &mut [offsetCount_t; ZSTD_REP_NUM as usize + 1], val: u32, @@ -847,326 +832,309 @@ fn ZDICT_flatLit(countLit: &mut [core::ffi::c_uint; 256]) { countLit[254] = 1; } -const OFFCODE_MAX: core::ffi::c_int = 30; +const OFFCODE_MAX: u32 = 30; unsafe fn ZDICT_analyzeEntropy( dstBuffer: *mut core::ffi::c_void, - mut maxDstSize: size_t, - mut compressionLevel: core::ffi::c_int, + maxDstSize: size_t, + compressionLevel: core::ffi::c_int, srcBuffer: *const core::ffi::c_void, - fileSizes: *const size_t, - nbFiles: core::ffi::c_uint, + fileSizes: &[usize], dictBuffer: *const core::ffi::c_void, dictBufferSize: size_t, notificationLevel: core::ffi::c_uint, -) -> size_t { - let mut hufTable: [HUF_CElt; 257] = [0; 257]; - let mut offcodeNCount: [core::ffi::c_short; 31] = [0; 31]; - let offcodeMax = - ZSTD_highbit32(dictBufferSize.wrapping_add((128 * ((1) << 10)) as size_t) as u32); - let mut matchLengthNCount: [core::ffi::c_short; 53] = [0; 53]; - let mut litLengthNCount: [core::ffi::c_short; 36] = [0; 36]; +) -> Result { let mut esr = EStats_ress_t { dict: core::ptr::null_mut(), zc: core::ptr::null_mut(), workPlace: core::ptr::null_mut(), }; - let mut params = ZSTD_parameters::default(); - let mut huffLog = 11; - let mut Offlog = OffFSELog as u32; - let mut mlLog = MLFSELog as u32; - let mut llLog = LLFSELog as u32; - let mut total: u32 = 0; - let mut pos = 0 as size_t; - let mut errorCode: size_t = 0; - let mut eSize = 0; - let totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles); - let averageSampleSize = totalSrcSize - / nbFiles.wrapping_add((nbFiles == 0) as core::ffi::c_int as core::ffi::c_uint) as size_t; - let mut dstPtr = dstBuffer as *mut u8; - let mut wksp: [u32; 1216] = [0; 1216]; - if offcodeMax > OFFCODE_MAX as u32 { - eSize = Error::dictionaryCreation_failed.to_error_code(); - } else { - let mut countLit = [1u32; 256]; - let mut offcodeCount = [1u32; 31]; - let mut matchLengthCount = [1u32; 53]; - let mut litLengthCount = [1u32; 36]; - let mut repOffset: [u32; 1024] = [0; 1024]; - repOffset[1] = 1; - repOffset[4] = 1; - repOffset[8] = 1; + let eSize = analyze_entropy_internal( + dstBuffer as *mut u8, + maxDstSize, + compressionLevel, + srcBuffer, + fileSizes, + dictBuffer, + dictBufferSize, + notificationLevel, + &mut esr, + ); - let mut bestRepOffset = [offsetCount_t::default(); ZSTD_REP_NUM as usize + 1]; + ZSTD_freeCDict(esr.dict); + ZSTD_freeCCtx(esr.zc); + free(esr.workPlace); + + eSize +} - if compressionLevel == 0 { - compressionLevel = ZSTD_CLEVEL_DEFAULT; +unsafe fn analyze_entropy_internal( + mut dstPtr: *mut u8, + mut maxDstSize: size_t, + mut compressionLevel: core::ffi::c_int, + srcBuffer: *const core::ffi::c_void, + fileSizes: &[usize], + dictBuffer: *const core::ffi::c_void, + dictBufferSize: size_t, + notificationLevel: core::ffi::c_uint, + esr: &mut EStats_ress_t, +) -> Result { + let mut hufTable: [HUF_CElt; 257] = [0; 257]; + + const KB: usize = 1 << 10; + let offcodeMax = ZSTD_highbit32(dictBufferSize.wrapping_add(128 * KB) as u32); + if offcodeMax > OFFCODE_MAX { + return Err(Error::dictionaryCreation_failed); + } + + let mut offcodeNCount = [0i16; OFFCODE_MAX as usize + 1]; + let mut matchLengthNCount = [0i16; MaxML as usize + 1]; + let mut litLengthNCount = [0i16; MaxLL as usize + 1]; + + let mut countLit = [1u32; 256]; + let mut offcodeCount = [1u32; OFFCODE_MAX as usize + 1]; + let mut matchLengthCount = [1u32; MaxML as usize + 1]; + let mut litLengthCount = [1u32; MaxLL as usize + 1]; + + let mut repOffset = [0; MAXREPOFFSET as usize]; + repOffset[1] = 1; + repOffset[4] = 1; + repOffset[8] = 1; + + let mut bestRepOffset = [offsetCount_t::default(); ZSTD_REP_NUM as usize + 1]; + + let averageSampleSize = fileSizes + .iter() + .sum::() + .checked_div(fileSizes.len()) + .unwrap_or(0); + if compressionLevel == 0 { + compressionLevel = ZSTD_CLEVEL_DEFAULT; + } + let params = ZSTD_getParams( + compressionLevel, + averageSampleSize as core::ffi::c_ulonglong, + dictBufferSize, + ); + esr.dict = ZSTD_createCDict_advanced( + dictBuffer, + dictBufferSize, + ZSTD_dlm_byRef, + ZSTD_dct_rawContent, + params.cParams, + ZSTD_customMem::default(), + ); + esr.zc = ZSTD_createCCtx(); + esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX as size_t); + if (esr.dict).is_null() || (esr.zc).is_null() || (esr.workPlace).is_null() { + if notificationLevel >= 1 { + eprintln!("Not enough memory"); } - params = ZSTD_getParams( - compressionLevel, - averageSampleSize as core::ffi::c_ulonglong, - dictBufferSize, + return Err(Error::memory_allocation); + } + + // collect stats on all samples + let mut pos = 0usize; + for fileSize in fileSizes { + ZDICT_countEStats( + *esr, + ¶ms, + &mut countLit, + &mut offcodeCount, + &mut matchLengthCount, + &mut litLengthCount, + &mut repOffset, + srcBuffer.byte_add(pos), + *fileSize, + notificationLevel, ); - esr.dict = ZSTD_createCDict_advanced( - dictBuffer, - dictBufferSize, - ZSTD_dlm_byRef, - ZSTD_dct_rawContent, - params.cParams, - ZSTD_customMem::default(), + pos = pos.wrapping_add(*fileSize); + } + if notificationLevel >= 4 { + eprintln!("Offset Code Frequencies :"); + for (i, count) in offcodeCount.iter().enumerate() { + eprintln!("{:>2} :{:>7} ", i, count); + } + } + + // analyze, build stats, starting with literals + let mut wksp = [0u32; HUF_CTABLE_WORKSPACE_SIZE_U32]; + let huffLog = 11; + let mut maxNbBits = HUF_buildCTable_wksp( + hufTable.as_mut_ptr(), + countLit.as_mut_ptr(), + 255, + huffLog, + wksp.as_mut_ptr() as *mut core::ffi::c_void, + ::core::mem::size_of::<[u32; HUF_CTABLE_WORKSPACE_SIZE_U32]>(), + ); + if let Some(err) = Error::from_error_code(maxNbBits) { + if notificationLevel >= 1 { + eprintln!(" HUF_buildCTable error"); + } + return Err(err); + } + if maxNbBits == 8 { + if notificationLevel >= 2 { + eprintln!("warning : pathological dataset : literals are not compressible : samples are noisy or too regular "); + } + ZDICT_flatLit(&mut countLit); + maxNbBits = HUF_buildCTable_wksp( + hufTable.as_mut_ptr(), + countLit.as_mut_ptr(), + 255, + huffLog, + wksp.as_mut_ptr() as *mut core::ffi::c_void, + ::core::mem::size_of::<[u32; HUF_CTABLE_WORKSPACE_SIZE_U32]>(), ); - esr.zc = ZSTD_createCCtx(); - esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX as size_t); - if (esr.dict).is_null() || (esr.zc).is_null() || (esr.workPlace).is_null() { - eSize = Error::memory_allocation.to_error_code(); - if notificationLevel >= 1 { - eprintln!("Not enough memory"); - } - } else { - let mut u = 0; - while u < nbFiles { - ZDICT_countEStats( - esr, - ¶ms, - &mut countLit, - &mut offcodeCount, - &mut matchLengthCount, - &mut litLengthCount, - &mut repOffset, - (srcBuffer as *const core::ffi::c_char).add(pos) as *const core::ffi::c_void, - *fileSizes.offset(u as isize), - notificationLevel, - ); - pos = pos.wrapping_add(*fileSizes.offset(u as isize)); - u = u.wrapping_add(1); - } - if notificationLevel >= 4 { - if notificationLevel >= 4 { - eprintln!("Offset Code Frequencies :"); - } - u = 0; - while u <= offcodeMax { - if notificationLevel >= 4 { - eprintln!( - "{:>2} :{:>7} ", - u, - *offcodeCount.as_mut_ptr().offset(u as isize), - ); - } - u = u.wrapping_add(1); - } - } - let mut maxNbBits = HUF_buildCTable_wksp( - hufTable.as_mut_ptr(), - countLit.as_mut_ptr(), - 255, - huffLog, - wksp.as_mut_ptr() as *mut core::ffi::c_void, - ::core::mem::size_of::<[u32; 1216]>(), - ); - if ERR_isError(maxNbBits) { - eSize = maxNbBits; - if notificationLevel >= 1 { - eprintln!(" HUF_buildCTable error"); - } - } else { - if maxNbBits == 8 { - if notificationLevel >= 2 { - eprintln!( - "warning : pathological dataset : literals are not compressible : samples are noisy or too regular " - ); - } - ZDICT_flatLit(&mut countLit); - maxNbBits = HUF_buildCTable_wksp( - hufTable.as_mut_ptr(), - countLit.as_mut_ptr(), - 255, - huffLog, - wksp.as_mut_ptr() as *mut core::ffi::c_void, - ::core::mem::size_of::<[u32; 1216]>(), - ); - } - huffLog = maxNbBits as u32; - let mut offset: u32 = 0; - offset = 1; - while offset < MAXREPOFFSET as u32 { - ZDICT_insertSortCount(&mut bestRepOffset, offset, repOffset[offset as usize]); - offset = offset.wrapping_add(1); - } - total = 0; - u = 0; - while u <= offcodeMax { - total = (total as core::ffi::c_uint) - .wrapping_add(*offcodeCount.as_mut_ptr().offset(u as isize)); - u = u.wrapping_add(1); - } - errorCode = FSE_normalizeCount( - offcodeNCount.as_mut_ptr(), - Offlog, - offcodeCount.as_mut_ptr(), - total as size_t, - offcodeMax, - 1, - ); - if ERR_isError(errorCode) { - eSize = errorCode; - if notificationLevel >= 1 { - eprintln!("FSE_normalizeCount error with offcodeCount"); - } - } else { - Offlog = errorCode as u32; - total = 0; - u = 0; - while u <= MaxML as u32 { - total = (total as core::ffi::c_uint) - .wrapping_add(*matchLengthCount.as_mut_ptr().offset(u as isize)); - u = u.wrapping_add(1); - } - errorCode = FSE_normalizeCount( - matchLengthNCount.as_mut_ptr(), - mlLog, - matchLengthCount.as_mut_ptr(), - total as size_t, - MaxML as core::ffi::c_uint, - 1, - ); - if ERR_isError(errorCode) { - eSize = errorCode; - if notificationLevel >= 1 { - eprintln!("FSE_normalizeCount error with matchLengthCount"); - } - } else { - mlLog = errorCode as u32; - total = 0; - u = 0; - while u <= MaxLL as u32 { - total = (total as core::ffi::c_uint) - .wrapping_add(*litLengthCount.as_mut_ptr().offset(u as isize)); - u = u.wrapping_add(1); - } - errorCode = FSE_normalizeCount( - litLengthNCount.as_mut_ptr(), - llLog, - litLengthCount.as_mut_ptr(), - total as size_t, - MaxLL as core::ffi::c_uint, - 1, - ); - if ERR_isError(errorCode) { - eSize = errorCode; - if notificationLevel >= 1 { - eprintln!("FSE_normalizeCount error with litLengthCount"); - } - } else { - llLog = errorCode as u32; - let hhSize = HUF_writeCTable_wksp( - dstPtr as *mut core::ffi::c_void, - maxDstSize, - hufTable.as_mut_ptr(), - 255, - huffLog, - wksp.as_mut_ptr() as *mut core::ffi::c_void, - ::core::mem::size_of::<[u32; 1216]>(), - ); - if ERR_isError(hhSize) { - eSize = hhSize; - if notificationLevel >= 1 { - eprintln!("HUF_writeCTable error"); - } - } else { - dstPtr = dstPtr.add(hhSize); - maxDstSize = maxDstSize.wrapping_sub(hhSize); - eSize = eSize.wrapping_add(hhSize); - let ohSize = FSE_writeNCount( - dstPtr as *mut core::ffi::c_void, - maxDstSize, - offcodeNCount.as_mut_ptr(), - OFFCODE_MAX as core::ffi::c_uint, - Offlog, - ); - if ERR_isError(ohSize) { - eSize = ohSize; - if notificationLevel >= 1 { - eprintln!("FSE_writeNCount error with offcodeNCount"); - } - } else { - dstPtr = dstPtr.add(ohSize); - maxDstSize = maxDstSize.wrapping_sub(ohSize); - eSize = eSize.wrapping_add(ohSize); - let mhSize = FSE_writeNCount( - dstPtr as *mut core::ffi::c_void, - maxDstSize, - matchLengthNCount.as_mut_ptr(), - MaxML as core::ffi::c_uint, - mlLog, - ); - if ERR_isError(mhSize) { - eSize = mhSize; - if notificationLevel >= 1 { - eprintln!( - "FSE_writeNCount error with matchLengthNCount " - ); - } - } else { - dstPtr = dstPtr.add(mhSize); - maxDstSize = maxDstSize.wrapping_sub(mhSize); - eSize = eSize.wrapping_add(mhSize); - let lhSize = FSE_writeNCount( - dstPtr as *mut core::ffi::c_void, - maxDstSize, - litLengthNCount.as_mut_ptr(), - MaxLL as core::ffi::c_uint, - llLog, - ); - if ERR_isError(lhSize) { - eSize = lhSize; - if notificationLevel >= 1 { - eprintln!( - "FSE_writeNCount error with litlengthNCount " - ); - } - } else { - dstPtr = dstPtr.add(lhSize); - maxDstSize = maxDstSize.wrapping_sub(lhSize); - eSize = eSize.wrapping_add(lhSize); - if maxDstSize < 12 { - eSize = -(ZSTD_error_dstSize_tooSmall - as core::ffi::c_int) - as size_t; - if notificationLevel >= 1 { - eprintln!( - "not enough space to write RepOffsets " - ); - } - } else { - MEM_writeLE32( - dstPtr as *mut core::ffi::c_void, - *repStartValue.as_ptr(), - ); - MEM_writeLE32( - dstPtr.add(4) as *mut core::ffi::c_void, - *repStartValue.as_ptr().add(1), - ); - MEM_writeLE32( - dstPtr.add(8) as *mut core::ffi::c_void, - *repStartValue.as_ptr().add(2), - ); - eSize = eSize.wrapping_add(12); - } - } - } - } - } - } - } - } - } + } + let huffLog = maxNbBits as u32; + + // look for most common first offsets + for offset in 1..MAXREPOFFSET { + ZDICT_insertSortCount(&mut bestRepOffset, offset, repOffset[offset as usize]); + } + + let total: u32 = offcodeCount[..offcodeMax as usize + 1].iter().sum(); + let errorCode = FSE_normalizeCount( + offcodeNCount.as_mut_ptr(), + OffFSELog, + offcodeCount.as_mut_ptr(), + total as size_t, + offcodeMax, + 1, + ); + if let Some(err) = Error::from_error_code(errorCode) { + if notificationLevel >= 1 { + eprintln!("FSE_normalizeCount error with offcodeCount"); } + return Err(err); } - ZSTD_freeCDict(esr.dict); - ZSTD_freeCCtx(esr.zc); - free(esr.workPlace); - eSize + let offLog = errorCode as u32; + + let total: u32 = matchLengthCount.iter().sum(); + let errorCode = FSE_normalizeCount( + matchLengthNCount.as_mut_ptr(), + MLFSELog, + matchLengthCount.as_mut_ptr(), + total as size_t, + MaxML, + 1, + ); + if let Some(err) = Error::from_error_code(errorCode) { + if notificationLevel >= 1 { + eprintln!("FSE_normalizeCount error with matchLengthCount"); + } + return Err(err); + } + let mlLog = errorCode as u32; + + let total: u32 = litLengthCount.iter().sum(); + let errorCode = FSE_normalizeCount( + litLengthNCount.as_mut_ptr(), + LLFSELog, + litLengthCount.as_mut_ptr(), + total as size_t, + MaxLL, + 1, + ); + if let Some(err) = Error::from_error_code(errorCode) { + if notificationLevel >= 1 { + eprintln!("FSE_normalizeCount error with litLengthCount"); + } + return Err(err); + } + let llLog = errorCode as u32; + + // write result to buffer + let hhSize = HUF_writeCTable_wksp( + dstPtr as *mut core::ffi::c_void, + maxDstSize, + hufTable.as_mut_ptr(), + 255, + huffLog, + wksp.as_mut_ptr() as *mut core::ffi::c_void, + ::core::mem::size_of::<[u32; HUF_CTABLE_WORKSPACE_SIZE_U32]>(), + ); + if let Some(err) = Error::from_error_code(hhSize) { + if notificationLevel >= 1 { + eprintln!("HUF_writeCTable error"); + } + return Err(err); + } + dstPtr = dstPtr.add(hhSize); + maxDstSize = maxDstSize.wrapping_sub(hhSize); + let mut eSize = hhSize; + + let ohSize = FSE_writeNCount( + dstPtr as *mut core::ffi::c_void, + maxDstSize, + offcodeNCount.as_mut_ptr(), + OFFCODE_MAX, + offLog, + ); + if let Some(err) = Error::from_error_code(ohSize) { + if notificationLevel >= 1 { + eprintln!("FSE_writeNCount error with offcodeNCount"); + } + return Err(err); + } + dstPtr = dstPtr.add(ohSize); + maxDstSize = maxDstSize.wrapping_sub(ohSize); + eSize = eSize.wrapping_add(ohSize); + + let mhSize = FSE_writeNCount( + dstPtr as *mut core::ffi::c_void, + maxDstSize, + matchLengthNCount.as_mut_ptr(), + MaxML, + mlLog, + ); + if let Some(err) = Error::from_error_code(mhSize) { + if notificationLevel >= 1 { + eprintln!("FSE_writeNCount error with matchLengthNCount "); + } + return Err(err); + } + dstPtr = dstPtr.add(mhSize); + maxDstSize = maxDstSize.wrapping_sub(mhSize); + eSize = eSize.wrapping_add(mhSize); + + let lhSize = FSE_writeNCount( + dstPtr as *mut core::ffi::c_void, + maxDstSize, + litLengthNCount.as_mut_ptr(), + MaxLL, + llLog, + ); + if let Some(err) = Error::from_error_code(lhSize) { + if notificationLevel >= 1 { + eprintln!("FSE_writeNCount error with litlengthNCount "); + } + return Err(err); + } + dstPtr = dstPtr.add(lhSize); + maxDstSize = maxDstSize.wrapping_sub(lhSize); + eSize = eSize.wrapping_add(lhSize); + + if maxDstSize < 12 { + if notificationLevel >= 1 { + eprintln!("not enough space to write RepOffsets "); + } + return Err(Error::dstSize_tooSmall); + } + + MEM_writeLE32(dstPtr as *mut core::ffi::c_void, *repStartValue.as_ptr()); + MEM_writeLE32( + dstPtr.add(4) as *mut core::ffi::c_void, + *repStartValue.as_ptr().add(1), + ); + MEM_writeLE32( + dstPtr.add(8) as *mut core::ffi::c_void, + *repStartValue.as_ptr().add(2), + ); + + Ok(eSize.wrapping_add(12)) } #[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(ZDICT_finalizeDictionary))] @@ -1180,6 +1148,12 @@ pub unsafe extern "C" fn ZDICT_finalizeDictionary( nbSamples: core::ffi::c_uint, params: ZDICT_params_t, ) -> size_t { + let samplesSizes = if samplesSizes.is_null() || nbSamples == 0 { + &[] + } else { + core::slice::from_raw_parts(samplesSizes, nbSamples as usize) + }; + finalize_dictionary( dictBuffer, dictBufferCapacity, @@ -1187,7 +1161,6 @@ pub unsafe extern "C" fn ZDICT_finalizeDictionary( dictContentSize, samplesBuffer, samplesSizes, - nbSamples, params, ) .map_err(|e| e.to_error_code()) @@ -1200,8 +1173,7 @@ unsafe fn finalize_dictionary( customDictContent: *const core::ffi::c_void, mut dictContentSize: size_t, samplesBuffer: *const core::ffi::c_void, - samplesSizes: *const size_t, - nbSamples: core::ffi::c_uint, + samplesSizes: &[usize], params: ZDICT_params_t, ) -> Result { let mut hSize: size_t = 0; @@ -1243,14 +1215,10 @@ unsafe fn finalize_dictionary( compressionLevel, samplesBuffer, samplesSizes, - nbSamples, customDictContent, dictContentSize, notificationLevel, - ); - if let Some(err) = Error::from_error_code(eSize) { - return Err(err); - } + )?; hSize = hSize.wrapping_add(eSize); if hSize.wrapping_add(dictContentSize) > dictBufferCapacity { dictContentSize = dictBufferCapacity.wrapping_sub(hSize); @@ -1285,8 +1253,7 @@ unsafe fn ZDICT_addEntropyTablesFromBuffer_advanced( dictContentSize: size_t, dictBufferCapacity: size_t, samplesBuffer: *const core::ffi::c_void, - samplesSizes: *const size_t, - nbSamples: core::ffi::c_uint, + samplesSizes: &[usize], params: ZDICT_params_t, ) -> size_t { let compressionLevel = if params.compressionLevel == 0 { @@ -1302,28 +1269,27 @@ unsafe fn ZDICT_addEntropyTablesFromBuffer_advanced( if notificationLevel >= 2 { eprintln!("statistics ..."); } - let eSize = ZDICT_analyzeEntropy( - (dictBuffer as *mut core::ffi::c_char).add(hSize) as *mut core::ffi::c_void, + let res = ZDICT_analyzeEntropy( + dictBuffer.byte_add(hSize), dictBufferCapacity.wrapping_sub(hSize), compressionLevel, samplesBuffer, samplesSizes, - nbSamples, - (dictBuffer as *mut core::ffi::c_char) - .add(dictBufferCapacity) - .offset(-(dictContentSize as isize)) as *const core::ffi::c_void, + dictBuffer + .byte_add(dictBufferCapacity) + .byte_offset(-(dictContentSize as isize)), dictContentSize, notificationLevel, ); - if ZDICT_isError(eSize) != 0 { - return eSize; - } - hSize = hSize.wrapping_add(eSize); + match res { + Ok(eSize) => hSize = hSize.wrapping_add(eSize), + Err(err) => return err.to_error_code(), + }; MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY); let randomID = ZSTD_XXH64( - (dictBuffer as *mut core::ffi::c_char) - .add(dictBufferCapacity) - .offset(-(dictContentSize as isize)) as *const core::ffi::c_void, + dictBuffer + .byte_add(dictBufferCapacity) + .byte_offset(-(dictContentSize as isize)), dictContentSize, 0, ); @@ -1334,10 +1300,7 @@ unsafe fn ZDICT_addEntropyTablesFromBuffer_advanced( } else { compliantID }; - MEM_writeLE32( - (dictBuffer as *mut core::ffi::c_char).add(4) as *mut core::ffi::c_void, - dictID, - ); + MEM_writeLE32(dictBuffer.byte_add(4), dictID); if hSize.wrapping_add(dictContentSize) < dictBufferCapacity { core::ptr::copy( (dictBuffer as *mut core::ffi::c_char) @@ -1353,14 +1316,15 @@ unsafe fn ZDICT_addEntropyTablesFromBuffer_advanced( hSize.wrapping_add(dictContentSize) } } + unsafe fn ZDICT_trainFromBuffer_unsafe_legacy( dictBuffer: *mut core::ffi::c_void, maxDictSize: size_t, samplesBuffer: *const core::ffi::c_void, - samplesSizes: *const size_t, - nbSamples: core::ffi::c_uint, + samplesSizes: &[usize], params: ZDICT_legacy_params_t, ) -> size_t { + let nbSamples = samplesSizes.len() as u32; let dictListSize = if (if 10000 > nbSamples { 10000 } else { nbSamples }) > (maxDictSize / 16) as u32 { if 10000 > nbSamples { @@ -1384,7 +1348,7 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy( nbSamples >> selectivity }; let targetDictSize = maxDictSize; - let samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + let samplesBuffSize = samplesSizes.iter().sum(); let mut dictSize = 0; let notificationLevel = params.zParams.notificationLevel; if dictList.is_null() { @@ -1404,7 +1368,7 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy( dictListSize, samplesBuffer, samplesBuffSize, - samplesSizes, + samplesSizes.as_ptr(), nbSamples, minRep, notificationLevel, @@ -1448,8 +1412,7 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy( ); } ZDICT_printHex( - (samplesBuffer as *const core::ffi::c_char).offset(pos as isize) - as *const core::ffi::c_void, + samplesBuffer.byte_offset(pos as isize), printedLength as size_t, ); if notificationLevel >= 3 { @@ -1542,9 +1505,7 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy( } memcpy( ptr as *mut core::ffi::c_void, - (samplesBuffer as *const core::ffi::c_char) - .offset((*dictList.offset(u_0 as isize)).pos as isize) - as *const core::ffi::c_void, + samplesBuffer.byte_offset((*dictList.offset(u_0 as isize)).pos as isize), l as size_t, ); u_0 = u_0.wrapping_add(1); @@ -1555,12 +1516,47 @@ unsafe fn ZDICT_trainFromBuffer_unsafe_legacy( maxDictSize, samplesBuffer, samplesSizes, - nbSamples, params.zParams, ); free(dictList as *mut core::ffi::c_void); dictSize } + +/// Train a dictionary from an array of samples. +/// +/// Samples must be stored concatenated in a single flat buffer `samplesBuffer`, supplied with an +/// array of sizes `samplesSizes`, providing the size of each sample, in order. +/// +/// The resulting dictionary will be saved into `dictBuffer`. +/// +/// `params` is optional and can be provided with values set to 0 to mean "default". +/// +/// In general, a reasonable dictionary has a size of ~100 KB. It's possible to select smaller or +/// larger size, just by specifying `dictBufferCapacity`. In general, it's recommended to provide a +/// few thousands samples, though this can vary a lot. It's recommended that total size of all +/// samples be about ~x100 times the target size of dictionary. +/// +/// # Returns +/// +/// - the size of the dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) +/// - an error code, which can be tested with [`ZDICT_isError`] +/// +/// Dictionary training will fail if there are not enough samples to construct a dictionary, or if +/// most of the samples are too small (< 8 bytes being the lower limit). If dictionary training +/// fails, you should use zstd without a dictionary, as the dictionary would've been ineffective +/// anyways. If you believe your samples would benefit from a dictionary please open an issue with +/// details, and we can look into it. +/// +/// # Safety +/// +/// Behavior is undefined if any of the following conditions are violated: +/// +/// - `dictBufferCapacity` is 0 or `dictBuffer` and `dictBufferCapacity` satisfy the requirements +/// of [`core::slice::from_raw_parts_mut`]. +/// - `nbSamples` is 0 or `samplesSizes` and `nbSamples` satisfy the requirements +/// of [`core::slice::from_raw_parts`]. +/// - `sum(samplesSizes)` is 0 or `samplesBuffer` and `sum(samplesSizes)` satisfy the requirements +/// of [`core::slice::from_raw_parts`]. #[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(ZDICT_trainFromBuffer_legacy))] pub unsafe extern "C" fn ZDICT_trainFromBuffer_legacy( dictBuffer: *mut core::ffi::c_void, @@ -1570,7 +1566,13 @@ pub unsafe extern "C" fn ZDICT_trainFromBuffer_legacy( nbSamples: core::ffi::c_uint, params: ZDICT_legacy_params_t, ) -> size_t { - let sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + let samplesSizes = if samplesSizes.is_null() || nbSamples == 0 { + &[] + } else { + core::slice::from_raw_parts(samplesSizes, nbSamples as usize) + }; + + let sBuffSize: size_t = samplesSizes.iter().sum(); if sBuffSize < ZDICT_MIN_SAMPLES_SIZE as size_t { return 0; } @@ -1582,7 +1584,6 @@ pub unsafe extern "C" fn ZDICT_trainFromBuffer_legacy( dictBufferCapacity, new_buf.as_ptr().cast::(), samplesSizes, - nbSamples, params, ) } @@ -1656,6 +1657,12 @@ pub unsafe extern "C" fn ZDICT_addEntropyTablesFromBuffer( samplesSizes: *const size_t, nbSamples: core::ffi::c_uint, ) -> size_t { + let samplesSizes = if samplesSizes.is_null() || nbSamples == 0 { + &[] + } else { + core::slice::from_raw_parts(samplesSizes, nbSamples as usize) + }; + let params = ZDICT_params_t::default(); ZDICT_addEntropyTablesFromBuffer_advanced( dictBuffer, @@ -1663,7 +1670,6 @@ pub unsafe extern "C" fn ZDICT_addEntropyTablesFromBuffer( dictBufferCapacity, samplesBuffer, samplesSizes, - nbSamples, params, ) } diff --git a/test-libzstd-rs-sys/src/dict_builder.rs b/test-libzstd-rs-sys/src/dict_builder.rs index 238709e1..c9c287d7 100644 --- a/test-libzstd-rs-sys/src/dict_builder.rs +++ b/test-libzstd-rs-sys/src/dict_builder.rs @@ -194,6 +194,90 @@ fn test_train_from_buffer_fastcover() { }); } +#[test] +#[cfg(not(target_family = "wasm"))] +#[cfg_attr(miri, ignore = "slow")] +fn test_train_from_buffer_legacy() { + let input_data = "The quick brown fox jumps high"; + + assert_eq_rs_c!({ + let mut sample_data = Vec::new(); + + let mut sample_sizes = Vec::new(); + + // ZDICT_trainFromBuffer_legacy needs at least 512 samples + for _ in 0..512 / SAMPLES.len() { + for &s in &SAMPLES { + sample_data.extend_from_slice(s.as_bytes()); + + sample_sizes.push(s.len()); + } + } + + let dict_capacity = 16 * 1024; + + let mut dict_buffer = vec![0u8; dict_capacity]; + + let params = ZDICT_legacy_params_t { + zParams: ZDICT_params_t { + compressionLevel: 3, + notificationLevel: 0, + dictID: 0, + }, + selectivityLevel: 0, + }; + + let dict_size = ZDICT_trainFromBuffer_legacy( + dict_buffer.as_mut_ptr() as *mut c_void, + dict_buffer.len(), + sample_data.as_ptr() as *const c_void, + sample_sizes.as_ptr(), + sample_sizes.len() as u32, + params, + ); + + assert_eq!( + ZDICT_isError(dict_size), + 0, + "Dict training failed {:?}", + CStr::from_ptr(ZDICT_getErrorName(dict_size)).to_str(), + ); + + dict_buffer.truncate(dict_size); + + println!("Dictionary size: {}", dict_size); + + let cctx = ZSTD_createCCtx(); + + assert!(!cctx.is_null()); + + let max_compressed_size = ZSTD_compressBound(input_data.len()); + + let mut compressed = vec![0u8; max_compressed_size]; + + let compressed_size = ZSTD_compress_usingDict( + cctx, + compressed.as_mut_ptr() as *mut c_void, + compressed.len(), + input_data.as_bytes().as_ptr() as *const c_void, + input_data.len(), + dict_buffer.as_ptr() as *const c_void, + dict_buffer.len(), + 3, // compression level + ); + + assert_eq!(ZSTD_isError(compressed_size), 0, "Compression failed"); + + compressed.truncate(compressed_size); + + println!("Compressed size: {}", compressed_size); + + ZSTD_freeCCtx(cctx); + + (compressed, dict_buffer) + }); +} + #[test] #[cfg(not(target_family = "wasm"))] fn test_optimize_train_from_buffer_cover_single_threaded() {