Skip to content

Commit aaf3685

Browse files
committed
ZDICT_analyzeEntropy: More readability cleanups
1 parent 1f7fb80 commit aaf3685

File tree

1 file changed

+47
-38
lines changed

1 file changed

+47
-38
lines changed

lib/dictBuilder/zdict.rs

Lines changed: 47 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -854,7 +854,7 @@ unsafe fn ZDICT_analyzeEntropy(
854854
};
855855

856856
let eSize = analyze_entropy_internal(
857-
dstBuffer,
857+
dstBuffer as *mut u8,
858858
maxDstSize,
859859
compressionLevel,
860860
srcBuffer,
@@ -873,7 +873,7 @@ unsafe fn ZDICT_analyzeEntropy(
873873
}
874874

875875
unsafe fn analyze_entropy_internal(
876-
dstBuffer: *mut core::ffi::c_void,
876+
mut dstPtr: *mut u8,
877877
mut maxDstSize: size_t,
878878
mut compressionLevel: core::ffi::c_int,
879879
srcBuffer: *const core::ffi::c_void,
@@ -884,27 +884,17 @@ unsafe fn analyze_entropy_internal(
884884
esr: &mut EStats_ress_t,
885885
) -> Result<size_t, Error> {
886886
let mut hufTable: [HUF_CElt; 257] = [0; 257];
887-
let mut offcodeNCount: [core::ffi::c_short; 31] = [0; 31];
888-
let offcodeMax =
889-
ZSTD_highbit32(dictBufferSize.wrapping_add((128 * ((1) << 10)) as size_t) as u32);
890-
let mut matchLengthNCount: [core::ffi::c_short; 53] = [0; 53];
891-
let mut litLengthNCount: [core::ffi::c_short; 36] = [0; 36];
892-
let mut params = ZSTD_parameters::default();
893-
let mut huffLog = 11;
894-
let mut offLog = OffFSELog;
895-
let mut mlLog = MLFSELog;
896-
let mut llLog = LLFSELog;
897-
let mut errorCode: size_t = 0;
898-
let averageSampleSize = if fileSizes.is_empty() {
899-
0
900-
} else {
901-
fileSizes.iter().sum::<usize>() / fileSizes.len()
902-
};
903-
let mut dstPtr = dstBuffer as *mut u8;
904-
let mut wksp: [u32; 1216] = [0; 1216];
887+
888+
const KB: usize = 1 << 10;
889+
let offcodeMax = ZSTD_highbit32(dictBufferSize.wrapping_add(128 * KB) as u32);
905890
if offcodeMax > OFFCODE_MAX {
906891
return Err(Error::dictionaryCreation_failed);
907892
}
893+
894+
let mut offcodeNCount = [0i16; (OFFCODE_MAX + 1) as usize];
895+
let mut matchLengthNCount = [0i16; (MaxML + 1) as usize];
896+
let mut litLengthNCount = [0i16; (MaxLL + 1) as usize];
897+
908898
let mut countLit = [1u32; 256];
909899
let mut offcodeCount = [1u32; (OFFCODE_MAX + 1) as usize];
910900
let mut matchLengthCount = [1u32; (MaxML + 1) as usize];
@@ -917,10 +907,15 @@ unsafe fn analyze_entropy_internal(
917907

918908
let mut bestRepOffset = [offsetCount_t::default(); ZSTD_REP_NUM as usize + 1];
919909

910+
let averageSampleSize = if fileSizes.is_empty() {
911+
0
912+
} else {
913+
fileSizes.iter().sum::<usize>() / fileSizes.len()
914+
};
920915
if compressionLevel == 0 {
921916
compressionLevel = ZSTD_CLEVEL_DEFAULT;
922917
}
923-
params = ZSTD_getParams(
918+
let params = ZSTD_getParams(
924919
compressionLevel,
925920
averageSampleSize as core::ffi::c_ulonglong,
926921
dictBufferSize,
@@ -941,6 +936,8 @@ unsafe fn analyze_entropy_internal(
941936
}
942937
return Err(Error::memory_allocation);
943938
}
939+
940+
// collect stats on all samples
944941
let mut pos = 0usize;
945942
for fileSize in fileSizes {
946943
ZDICT_countEStats(
@@ -963,6 +960,10 @@ unsafe fn analyze_entropy_internal(
963960
eprintln!("{:>2} :{:>7} ", i, count);
964961
}
965962
}
963+
964+
// analyze, build stats, starting with literals
965+
let mut wksp: [u32; 1216] = [0; 1216];
966+
let huffLog = 11;
966967
let mut maxNbBits = HUF_buildCTable_wksp(
967968
hufTable.as_mut_ptr(),
968969
countLit.as_mut_ptr(),
@@ -979,9 +980,7 @@ unsafe fn analyze_entropy_internal(
979980
}
980981
if maxNbBits == 8 {
981982
if notificationLevel >= 2 {
982-
eprintln!(
983-
"warning : pathological dataset : literals are not compressible : samples are noisy or too regular "
984-
);
983+
eprintln!("warning : pathological dataset : literals are not compressible : samples are noisy or too regular ");
985984
}
986985
ZDICT_flatLit(&mut countLit);
987986
maxNbBits = HUF_buildCTable_wksp(
@@ -993,17 +992,17 @@ unsafe fn analyze_entropy_internal(
993992
::core::mem::size_of::<[u32; 1216]>(),
994993
);
995994
}
996-
huffLog = maxNbBits as u32;
997-
let mut offset: u32 = 0;
998-
offset = 1;
999-
while offset < MAXREPOFFSET {
995+
let huffLog = maxNbBits as u32;
996+
997+
// look for most common first offsets
998+
for offset in 1..MAXREPOFFSET {
1000999
ZDICT_insertSortCount(&mut bestRepOffset, offset, repOffset[offset as usize]);
1001-
offset = offset.wrapping_add(1);
10021000
}
1001+
10031002
let total: u32 = offcodeCount[..offcodeMax as usize + 1].iter().sum();
1004-
errorCode = FSE_normalizeCount(
1003+
let errorCode = FSE_normalizeCount(
10051004
offcodeNCount.as_mut_ptr(),
1006-
offLog,
1005+
OffFSELog,
10071006
offcodeCount.as_mut_ptr(),
10081007
total as size_t,
10091008
offcodeMax,
@@ -1015,11 +1014,12 @@ unsafe fn analyze_entropy_internal(
10151014
}
10161015
return Err(err);
10171016
}
1018-
offLog = errorCode as u32;
1017+
let offLog = errorCode as u32;
1018+
10191019
let total: u32 = matchLengthCount.iter().sum();
1020-
errorCode = FSE_normalizeCount(
1020+
let errorCode = FSE_normalizeCount(
10211021
matchLengthNCount.as_mut_ptr(),
1022-
mlLog,
1022+
MLFSELog,
10231023
matchLengthCount.as_mut_ptr(),
10241024
total as size_t,
10251025
MaxML,
@@ -1031,11 +1031,12 @@ unsafe fn analyze_entropy_internal(
10311031
}
10321032
return Err(err);
10331033
}
1034-
mlLog = errorCode as u32;
1034+
let mlLog = errorCode as u32;
1035+
10351036
let total: u32 = litLengthCount.iter().sum();
1036-
errorCode = FSE_normalizeCount(
1037+
let errorCode = FSE_normalizeCount(
10371038
litLengthNCount.as_mut_ptr(),
1038-
llLog,
1039+
LLFSELog,
10391040
litLengthCount.as_mut_ptr(),
10401041
total as size_t,
10411042
MaxLL,
@@ -1047,7 +1048,9 @@ unsafe fn analyze_entropy_internal(
10471048
}
10481049
return Err(err);
10491050
}
1050-
llLog = errorCode as u32;
1051+
let llLog = errorCode as u32;
1052+
1053+
// write result to buffer
10511054
let hhSize = HUF_writeCTable_wksp(
10521055
dstPtr as *mut core::ffi::c_void,
10531056
maxDstSize,
@@ -1066,6 +1069,7 @@ unsafe fn analyze_entropy_internal(
10661069
dstPtr = dstPtr.add(hhSize);
10671070
maxDstSize = maxDstSize.wrapping_sub(hhSize);
10681071
let mut eSize = hhSize;
1072+
10691073
let ohSize = FSE_writeNCount(
10701074
dstPtr as *mut core::ffi::c_void,
10711075
maxDstSize,
@@ -1082,6 +1086,7 @@ unsafe fn analyze_entropy_internal(
10821086
dstPtr = dstPtr.add(ohSize);
10831087
maxDstSize = maxDstSize.wrapping_sub(ohSize);
10841088
eSize = eSize.wrapping_add(ohSize);
1089+
10851090
let mhSize = FSE_writeNCount(
10861091
dstPtr as *mut core::ffi::c_void,
10871092
maxDstSize,
@@ -1098,6 +1103,7 @@ unsafe fn analyze_entropy_internal(
10981103
dstPtr = dstPtr.add(mhSize);
10991104
maxDstSize = maxDstSize.wrapping_sub(mhSize);
11001105
eSize = eSize.wrapping_add(mhSize);
1106+
11011107
let lhSize = FSE_writeNCount(
11021108
dstPtr as *mut core::ffi::c_void,
11031109
maxDstSize,
@@ -1114,12 +1120,14 @@ unsafe fn analyze_entropy_internal(
11141120
dstPtr = dstPtr.add(lhSize);
11151121
maxDstSize = maxDstSize.wrapping_sub(lhSize);
11161122
eSize = eSize.wrapping_add(lhSize);
1123+
11171124
if maxDstSize < 12 {
11181125
if notificationLevel >= 1 {
11191126
eprintln!("not enough space to write RepOffsets ");
11201127
}
11211128
return Err(Error::dstSize_tooSmall);
11221129
}
1130+
11231131
MEM_writeLE32(dstPtr as *mut core::ffi::c_void, *repStartValue.as_ptr());
11241132
MEM_writeLE32(
11251133
dstPtr.add(4) as *mut core::ffi::c_void,
@@ -1129,6 +1137,7 @@ unsafe fn analyze_entropy_internal(
11291137
dstPtr.add(8) as *mut core::ffi::c_void,
11301138
*repStartValue.as_ptr().add(2),
11311139
);
1140+
11321141
Ok(eSize.wrapping_add(12))
11331142
}
11341143

0 commit comments

Comments
 (0)