Skip to content

Commit b8225cf

Browse files
michielp1807folkertdev
authored andcommitted
ZDICT_trainBuffer_legacy: make filePos a vec
1 parent 2739a90 commit b8225cf

File tree

1 file changed

+80
-87
lines changed

1 file changed

+80
-87
lines changed

lib/dictBuilder/zdict.rs

Lines changed: 80 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -593,111 +593,104 @@ unsafe fn ZDICT_trainBuffer_legacy(
593593
mut minRatio: core::ffi::c_uint,
594594
notificationLevel: u32,
595595
) -> size_t {
596-
let filePos =
597-
malloc((nbFiles as size_t).wrapping_mul(::core::mem::size_of::<u32>())) as *mut u32;
598-
let mut result = 0;
599596
let mut displayClock = Instant::now();
600597
let refresh_rate = Duration::from_millis(300);
601598

602599
// init
603600
if notificationLevel >= 2 {
604601
eprintln!("\r{:70 }\r", ""); // clean display line
605602
}
606-
if filePos.is_null() {
607-
result = Error::memory_allocation.to_error_code();
608-
} else {
609-
if minRatio < MINRATIO {
610-
minRatio = MINRATIO;
611-
}
612603

613-
// limit sample set size (divsufsort limitation)
614-
if bufferSize > ZDICT_MAX_SAMPLES_SIZE && notificationLevel >= 3 {
615-
eprintln!(
616-
"sample set too large : reduced to {} MB ...",
617-
(2000) << 20 >> 20,
618-
);
619-
}
620-
while bufferSize > ZDICT_MAX_SAMPLES_SIZE {
621-
nbFiles = nbFiles.wrapping_sub(1);
622-
bufferSize = bufferSize.wrapping_sub(*fileSizes.offset(nbFiles as isize));
623-
}
604+
if minRatio < MINRATIO {
605+
minRatio = MINRATIO;
606+
}
624607

625-
// sort
626-
if notificationLevel >= 2 {
627-
eprintln!(
628-
"sorting {} files of total size {} MB ...",
629-
nbFiles,
630-
bufferSize >> 20,
631-
);
632-
}
633-
let mut suffix = vec![0u32; bufferSize];
634-
let divSuftSortResult = divsufsort(
635-
core::slice::from_raw_parts(buffer as *const u8, bufferSize),
636-
std::mem::transmute::<&mut [u32], &mut [i32]>(&mut suffix[..]),
637-
false,
608+
// limit sample set size (divsufsort limitation)
609+
if bufferSize > ZDICT_MAX_SAMPLES_SIZE && notificationLevel >= 3 {
610+
eprintln!(
611+
"sample set too large : reduced to {} MB ...",
612+
(2000) << 20 >> 20,
638613
);
639-
if divSuftSortResult != 0 {
640-
result = Error::GENERIC.to_error_code();
641-
} else {
642-
// build reverse suffix sort
643-
let mut reverseSuffix = vec![0u32; bufferSize];
644-
for pos in 0..bufferSize {
645-
reverseSuffix[suffix[pos] as usize] = pos as u32;
646-
}
614+
}
615+
while bufferSize > ZDICT_MAX_SAMPLES_SIZE {
616+
nbFiles = nbFiles.wrapping_sub(1);
617+
bufferSize = bufferSize.wrapping_sub(*fileSizes.offset(nbFiles as isize));
618+
}
647619

648-
// Note: filePos tracks borders between samples.
649-
// It's not used at this stage, but planned to become useful in a later update
650-
*filePos = 0;
651-
for pos in 1..nbFiles as size_t {
652-
*filePos.add(pos) = (*filePos.add(pos.wrapping_sub(1)) as size_t)
653-
.wrapping_add(*fileSizes.add(pos.wrapping_sub(1)))
654-
as u32;
655-
}
620+
// sort
621+
if notificationLevel >= 2 {
622+
eprintln!(
623+
"sorting {} files of total size {} MB ...",
624+
nbFiles,
625+
bufferSize >> 20,
626+
);
627+
}
628+
let mut suffix = vec![0u32; bufferSize];
629+
let divSuftSortResult = divsufsort(
630+
core::slice::from_raw_parts(buffer as *const u8, bufferSize),
631+
std::mem::transmute::<&mut [u32], &mut [i32]>(&mut suffix),
632+
false,
633+
);
634+
if divSuftSortResult != 0 {
635+
return Error::GENERIC.to_error_code();
636+
}
656637

657-
if notificationLevel >= 2 {
658-
eprintln!("finding patterns ...");
659-
}
660-
if notificationLevel >= 3 {
661-
eprintln!("minimum ratio : {} ", minRatio);
662-
}
638+
// build reverse suffix sort
639+
let mut reverseSuffix = vec![0u32; bufferSize];
640+
for pos in 0..bufferSize {
641+
reverseSuffix[suffix[pos] as usize] = pos as u32;
642+
}
663643

664-
let mut doneMarks = vec![0u8; bufferSize + 16];
665-
let mut cursor = 0usize;
666-
while cursor < bufferSize {
667-
if doneMarks[cursor] != 0 {
668-
cursor += 1;
669-
continue;
670-
}
644+
// Note: filePos tracks borders between samples.
645+
// It's not used at this stage, but planned to become useful in a later update
646+
let mut filePos = vec![0u32; nbFiles as usize];
647+
// filePos[0] is intentionally left 0
648+
for pos in 1..nbFiles as size_t {
649+
filePos[pos] =
650+
(filePos[pos - 1] as size_t).wrapping_add(*fileSizes.add(pos.wrapping_sub(1))) as u32;
651+
}
671652

672-
let solution = ZDICT_analyzePos(
673-
&mut doneMarks,
674-
&suffix,
675-
reverseSuffix[cursor],
676-
buffer,
677-
minRatio,
678-
notificationLevel,
679-
);
680-
if solution.length == 0 {
681-
cursor += 1;
682-
continue;
683-
}
653+
if notificationLevel >= 2 {
654+
eprintln!("finding patterns ...");
655+
}
656+
if notificationLevel >= 3 {
657+
eprintln!("minimum ratio : {} ", minRatio);
658+
}
684659

685-
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
686-
cursor += solution.length as usize;
660+
let mut doneMarks = vec![0u8; bufferSize + 16];
661+
let mut cursor = 0usize;
662+
while cursor < bufferSize {
663+
if doneMarks[cursor] != 0 {
664+
cursor += 1;
665+
continue;
666+
}
687667

688-
if notificationLevel >= 2 && displayClock.elapsed() > refresh_rate {
689-
displayClock = Instant::now();
690-
eprint!(
691-
"\r{:4.2} % \r",
692-
cursor as core::ffi::c_double / bufferSize as core::ffi::c_double
693-
* 100.0f64,
694-
);
695-
}
696-
}
668+
let solution = ZDICT_analyzePos(
669+
&mut doneMarks,
670+
&suffix,
671+
reverseSuffix[cursor],
672+
buffer,
673+
minRatio,
674+
notificationLevel,
675+
);
676+
if solution.length == 0 {
677+
cursor += 1;
678+
continue;
679+
}
680+
681+
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
682+
cursor += solution.length as usize;
683+
684+
if notificationLevel >= 2 && displayClock.elapsed() > refresh_rate {
685+
displayClock = Instant::now();
686+
eprint!(
687+
"\r{:4.2} % \r",
688+
cursor as core::ffi::c_double / bufferSize as core::ffi::c_double * 100.0f64,
689+
);
697690
}
698691
}
699-
free(filePos as *mut core::ffi::c_void);
700-
result
692+
693+
0
701694
}
702695

703696
fn fill_noise(buffer: &mut [u8]) {

0 commit comments

Comments
 (0)