@@ -854,7 +854,7 @@ unsafe fn ZDICT_analyzeEntropy(
854854 } ;
855855
856856 let eSize = analyze_entropy_internal (
857- dstBuffer,
857+ dstBuffer as * mut u8 ,
858858 maxDstSize,
859859 compressionLevel,
860860 srcBuffer,
@@ -873,7 +873,7 @@ unsafe fn ZDICT_analyzeEntropy(
873873}
874874
875875unsafe fn analyze_entropy_internal (
876- dstBuffer : * mut core :: ffi :: c_void ,
876+ mut dstPtr : * mut u8 ,
877877 mut maxDstSize : size_t ,
878878 mut compressionLevel : core:: ffi:: c_int ,
879879 srcBuffer : * const core:: ffi:: c_void ,
@@ -884,27 +884,17 @@ unsafe fn analyze_entropy_internal(
884884 esr : & mut EStats_ress_t ,
885885) -> Result < size_t , Error > {
886886 let mut hufTable: [ HUF_CElt ; 257 ] = [ 0 ; 257 ] ;
887- let mut offcodeNCount: [ core:: ffi:: c_short ; 31 ] = [ 0 ; 31 ] ;
888- let offcodeMax =
889- ZSTD_highbit32 ( dictBufferSize. wrapping_add ( ( 128 * ( ( 1 ) << 10 ) ) as size_t ) as u32 ) ;
890- let mut matchLengthNCount: [ core:: ffi:: c_short ; 53 ] = [ 0 ; 53 ] ;
891- let mut litLengthNCount: [ core:: ffi:: c_short ; 36 ] = [ 0 ; 36 ] ;
892- let mut params = ZSTD_parameters :: default ( ) ;
893- let mut huffLog = 11 ;
894- let mut offLog = OffFSELog ;
895- let mut mlLog = MLFSELog ;
896- let mut llLog = LLFSELog ;
897- let mut errorCode: size_t = 0 ;
898- let averageSampleSize = if fileSizes. is_empty ( ) {
899- 0
900- } else {
901- fileSizes. iter ( ) . sum :: < usize > ( ) / fileSizes. len ( )
902- } ;
903- let mut dstPtr = dstBuffer as * mut u8 ;
904- let mut wksp: [ u32 ; 1216 ] = [ 0 ; 1216 ] ;
887+
888+ const KB : usize = 1 << 10 ;
889+ let offcodeMax = ZSTD_highbit32 ( dictBufferSize. wrapping_add ( 128 * KB ) as u32 ) ;
905890 if offcodeMax > OFFCODE_MAX {
906891 return Err ( Error :: dictionaryCreation_failed) ;
907892 }
893+
894+ let mut offcodeNCount = [ 0i16 ; ( OFFCODE_MAX + 1 ) as usize ] ;
895+ let mut matchLengthNCount = [ 0i16 ; ( MaxML + 1 ) as usize ] ;
896+ let mut litLengthNCount = [ 0i16 ; ( MaxLL + 1 ) as usize ] ;
897+
908898 let mut countLit = [ 1u32 ; 256 ] ;
909899 let mut offcodeCount = [ 1u32 ; ( OFFCODE_MAX + 1 ) as usize ] ;
910900 let mut matchLengthCount = [ 1u32 ; ( MaxML + 1 ) as usize ] ;
@@ -917,10 +907,15 @@ unsafe fn analyze_entropy_internal(
917907
918908 let mut bestRepOffset = [ offsetCount_t:: default ( ) ; ZSTD_REP_NUM as usize + 1 ] ;
919909
910+ let averageSampleSize = if fileSizes. is_empty ( ) {
911+ 0
912+ } else {
913+ fileSizes. iter ( ) . sum :: < usize > ( ) / fileSizes. len ( )
914+ } ;
920915 if compressionLevel == 0 {
921916 compressionLevel = ZSTD_CLEVEL_DEFAULT ;
922917 }
923- params = ZSTD_getParams (
918+ let params = ZSTD_getParams (
924919 compressionLevel,
925920 averageSampleSize as core:: ffi:: c_ulonglong ,
926921 dictBufferSize,
@@ -941,6 +936,8 @@ unsafe fn analyze_entropy_internal(
941936 }
942937 return Err ( Error :: memory_allocation) ;
943938 }
939+
940+ // collect stats on all samples
944941 let mut pos = 0usize ;
945942 for fileSize in fileSizes {
946943 ZDICT_countEStats (
@@ -963,6 +960,10 @@ unsafe fn analyze_entropy_internal(
963960 eprintln ! ( "{:>2} :{:>7} " , i, count) ;
964961 }
965962 }
963+
964+ // analyze, build stats, starting with literals
965+ let mut wksp: [ u32 ; 1216 ] = [ 0 ; 1216 ] ;
966+ let huffLog = 11 ;
966967 let mut maxNbBits = HUF_buildCTable_wksp (
967968 hufTable. as_mut_ptr ( ) ,
968969 countLit. as_mut_ptr ( ) ,
@@ -979,9 +980,7 @@ unsafe fn analyze_entropy_internal(
979980 }
980981 if maxNbBits == 8 {
981982 if notificationLevel >= 2 {
982- eprintln ! (
983- "warning : pathological dataset : literals are not compressible : samples are noisy or too regular "
984- ) ;
983+ eprintln ! ( "warning : pathological dataset : literals are not compressible : samples are noisy or too regular " ) ;
985984 }
986985 ZDICT_flatLit ( & mut countLit) ;
987986 maxNbBits = HUF_buildCTable_wksp (
@@ -993,17 +992,17 @@ unsafe fn analyze_entropy_internal(
993992 :: core:: mem:: size_of :: < [ u32 ; 1216 ] > ( ) ,
994993 ) ;
995994 }
996- huffLog = maxNbBits as u32 ;
997- let mut offset : u32 = 0 ;
998- offset = 1 ;
999- while offset < MAXREPOFFSET {
995+ let huffLog = maxNbBits as u32 ;
996+
997+ // look for most common first offsets
998+ for offset in 1 .. MAXREPOFFSET {
1000999 ZDICT_insertSortCount ( & mut bestRepOffset, offset, repOffset[ offset as usize ] ) ;
1001- offset = offset. wrapping_add ( 1 ) ;
10021000 }
1001+
10031002 let total: u32 = offcodeCount[ ..offcodeMax as usize + 1 ] . iter ( ) . sum ( ) ;
1004- errorCode = FSE_normalizeCount (
1003+ let errorCode = FSE_normalizeCount (
10051004 offcodeNCount. as_mut_ptr ( ) ,
1006- offLog ,
1005+ OffFSELog ,
10071006 offcodeCount. as_mut_ptr ( ) ,
10081007 total as size_t ,
10091008 offcodeMax,
@@ -1015,11 +1014,12 @@ unsafe fn analyze_entropy_internal(
10151014 }
10161015 return Err ( err) ;
10171016 }
1018- offLog = errorCode as u32 ;
1017+ let offLog = errorCode as u32 ;
1018+
10191019 let total: u32 = matchLengthCount. iter ( ) . sum ( ) ;
1020- errorCode = FSE_normalizeCount (
1020+ let errorCode = FSE_normalizeCount (
10211021 matchLengthNCount. as_mut_ptr ( ) ,
1022- mlLog ,
1022+ MLFSELog ,
10231023 matchLengthCount. as_mut_ptr ( ) ,
10241024 total as size_t ,
10251025 MaxML ,
@@ -1031,11 +1031,12 @@ unsafe fn analyze_entropy_internal(
10311031 }
10321032 return Err ( err) ;
10331033 }
1034- mlLog = errorCode as u32 ;
1034+ let mlLog = errorCode as u32 ;
1035+
10351036 let total: u32 = litLengthCount. iter ( ) . sum ( ) ;
1036- errorCode = FSE_normalizeCount (
1037+ let errorCode = FSE_normalizeCount (
10371038 litLengthNCount. as_mut_ptr ( ) ,
1038- llLog ,
1039+ LLFSELog ,
10391040 litLengthCount. as_mut_ptr ( ) ,
10401041 total as size_t ,
10411042 MaxLL ,
@@ -1047,7 +1048,9 @@ unsafe fn analyze_entropy_internal(
10471048 }
10481049 return Err ( err) ;
10491050 }
1050- llLog = errorCode as u32 ;
1051+ let llLog = errorCode as u32 ;
1052+
1053+ // write result to buffer
10511054 let hhSize = HUF_writeCTable_wksp (
10521055 dstPtr as * mut core:: ffi:: c_void ,
10531056 maxDstSize,
@@ -1066,6 +1069,7 @@ unsafe fn analyze_entropy_internal(
10661069 dstPtr = dstPtr. add ( hhSize) ;
10671070 maxDstSize = maxDstSize. wrapping_sub ( hhSize) ;
10681071 let mut eSize = hhSize;
1072+
10691073 let ohSize = FSE_writeNCount (
10701074 dstPtr as * mut core:: ffi:: c_void ,
10711075 maxDstSize,
@@ -1082,6 +1086,7 @@ unsafe fn analyze_entropy_internal(
10821086 dstPtr = dstPtr. add ( ohSize) ;
10831087 maxDstSize = maxDstSize. wrapping_sub ( ohSize) ;
10841088 eSize = eSize. wrapping_add ( ohSize) ;
1089+
10851090 let mhSize = FSE_writeNCount (
10861091 dstPtr as * mut core:: ffi:: c_void ,
10871092 maxDstSize,
@@ -1098,6 +1103,7 @@ unsafe fn analyze_entropy_internal(
10981103 dstPtr = dstPtr. add ( mhSize) ;
10991104 maxDstSize = maxDstSize. wrapping_sub ( mhSize) ;
11001105 eSize = eSize. wrapping_add ( mhSize) ;
1106+
11011107 let lhSize = FSE_writeNCount (
11021108 dstPtr as * mut core:: ffi:: c_void ,
11031109 maxDstSize,
@@ -1114,12 +1120,14 @@ unsafe fn analyze_entropy_internal(
11141120 dstPtr = dstPtr. add ( lhSize) ;
11151121 maxDstSize = maxDstSize. wrapping_sub ( lhSize) ;
11161122 eSize = eSize. wrapping_add ( lhSize) ;
1123+
11171124 if maxDstSize < 12 {
11181125 if notificationLevel >= 1 {
11191126 eprintln ! ( "not enough space to write RepOffsets " ) ;
11201127 }
11211128 return Err ( Error :: dstSize_tooSmall) ;
11221129 }
1130+
11231131 MEM_writeLE32 ( dstPtr as * mut core:: ffi:: c_void , * repStartValue. as_ptr ( ) ) ;
11241132 MEM_writeLE32 (
11251133 dstPtr. add ( 4 ) as * mut core:: ffi:: c_void ,
@@ -1129,6 +1137,7 @@ unsafe fn analyze_entropy_internal(
11291137 dstPtr. add ( 8 ) as * mut core:: ffi:: c_void ,
11301138 * repStartValue. as_ptr ( ) . add ( 2 ) ,
11311139 ) ;
1140+
11321141 Ok ( eSize. wrapping_add ( 12 ) )
11331142}
11341143
0 commit comments