@@ -593,111 +593,104 @@ unsafe fn ZDICT_trainBuffer_legacy(
593593 mut minRatio : core:: ffi:: c_uint ,
594594 notificationLevel : u32 ,
595595) -> size_t {
596- let filePos =
597- malloc ( ( nbFiles as size_t ) . wrapping_mul ( :: core:: mem:: size_of :: < u32 > ( ) ) ) as * mut u32 ;
598- let mut result = 0 ;
599596 let mut displayClock = Instant :: now ( ) ;
600597 let refresh_rate = Duration :: from_millis ( 300 ) ;
601598
602599 // init
603600 if notificationLevel >= 2 {
604601 eprintln ! ( "\r {:70 }\r " , "" ) ; // clean display line
605602 }
606- if filePos. is_null ( ) {
607- result = Error :: memory_allocation. to_error_code ( ) ;
608- } else {
609- if minRatio < MINRATIO {
610- minRatio = MINRATIO ;
611- }
612603
613- // limit sample set size (divsufsort limitation)
614- if bufferSize > ZDICT_MAX_SAMPLES_SIZE && notificationLevel >= 3 {
615- eprintln ! (
616- "sample set too large : reduced to {} MB ..." ,
617- ( 2000 ) << 20 >> 20 ,
618- ) ;
619- }
620- while bufferSize > ZDICT_MAX_SAMPLES_SIZE {
621- nbFiles = nbFiles. wrapping_sub ( 1 ) ;
622- bufferSize = bufferSize. wrapping_sub ( * fileSizes. offset ( nbFiles as isize ) ) ;
623- }
604+ if minRatio < MINRATIO {
605+ minRatio = MINRATIO ;
606+ }
624607
625- // sort
626- if notificationLevel >= 2 {
627- eprintln ! (
628- "sorting {} files of total size {} MB ..." ,
629- nbFiles,
630- bufferSize >> 20 ,
631- ) ;
632- }
633- let mut suffix = vec ! [ 0u32 ; bufferSize] ;
634- let divSuftSortResult = divsufsort (
635- core:: slice:: from_raw_parts ( buffer as * const u8 , bufferSize) ,
636- std:: mem:: transmute :: < & mut [ u32 ] , & mut [ i32 ] > ( & mut suffix[ ..] ) ,
637- false ,
608+ // limit sample set size (divsufsort limitation)
609+ if bufferSize > ZDICT_MAX_SAMPLES_SIZE && notificationLevel >= 3 {
610+ eprintln ! (
611+ "sample set too large : reduced to {} MB ..." ,
612+ ( 2000 ) << 20 >> 20 ,
638613 ) ;
639- if divSuftSortResult != 0 {
640- result = Error :: GENERIC . to_error_code ( ) ;
641- } else {
642- // build reverse suffix sort
643- let mut reverseSuffix = vec ! [ 0u32 ; bufferSize] ;
644- for pos in 0 ..bufferSize {
645- reverseSuffix[ suffix[ pos] as usize ] = pos as u32 ;
646- }
614+ }
615+ while bufferSize > ZDICT_MAX_SAMPLES_SIZE {
616+ nbFiles = nbFiles. wrapping_sub ( 1 ) ;
617+ bufferSize = bufferSize. wrapping_sub ( * fileSizes. offset ( nbFiles as isize ) ) ;
618+ }
647619
648- // Note: filePos tracks borders between samples.
649- // It's not used at this stage, but planned to become useful in a later update
650- * filePos = 0 ;
651- for pos in 1 ..nbFiles as size_t {
652- * filePos. add ( pos) = ( * filePos. add ( pos. wrapping_sub ( 1 ) ) as size_t )
653- . wrapping_add ( * fileSizes. add ( pos. wrapping_sub ( 1 ) ) )
654- as u32 ;
655- }
620+ // sort
621+ if notificationLevel >= 2 {
622+ eprintln ! (
623+ "sorting {} files of total size {} MB ..." ,
624+ nbFiles,
625+ bufferSize >> 20 ,
626+ ) ;
627+ }
628+ let mut suffix = vec ! [ 0u32 ; bufferSize] ;
629+ let divSuftSortResult = divsufsort (
630+ core:: slice:: from_raw_parts ( buffer as * const u8 , bufferSize) ,
631+ std:: mem:: transmute :: < & mut [ u32 ] , & mut [ i32 ] > ( & mut suffix) ,
632+ false ,
633+ ) ;
634+ if divSuftSortResult != 0 {
635+ return Error :: GENERIC . to_error_code ( ) ;
636+ }
656637
657- if notificationLevel >= 2 {
658- eprintln ! ( "finding patterns ..." ) ;
659- }
660- if notificationLevel >= 3 {
661- eprintln ! ( "minimum ratio : {} " , minRatio) ;
662- }
638+ // build reverse suffix sort
639+ let mut reverseSuffix = vec ! [ 0u32 ; bufferSize] ;
640+ for pos in 0 ..bufferSize {
641+ reverseSuffix[ suffix[ pos] as usize ] = pos as u32 ;
642+ }
663643
664- let mut doneMarks = vec ! [ 0u8 ; bufferSize + 16 ] ;
665- let mut cursor = 0usize ;
666- while cursor < bufferSize {
667- if doneMarks[ cursor] != 0 {
668- cursor += 1 ;
669- continue ;
670- }
644+ // Note: filePos tracks borders between samples.
645+ // It's not used at this stage, but planned to become useful in a later update
646+ let mut filePos = vec ! [ 0u32 ; nbFiles as usize ] ;
647+ // filePos[0] is intentionally left 0
648+ for pos in 1 ..nbFiles as size_t {
649+ filePos[ pos] =
650+ ( filePos[ pos - 1 ] as size_t ) . wrapping_add ( * fileSizes. add ( pos. wrapping_sub ( 1 ) ) ) as u32 ;
651+ }
671652
672- let solution = ZDICT_analyzePos (
673- & mut doneMarks,
674- & suffix,
675- reverseSuffix[ cursor] ,
676- buffer,
677- minRatio,
678- notificationLevel,
679- ) ;
680- if solution. length == 0 {
681- cursor += 1 ;
682- continue ;
683- }
653+ if notificationLevel >= 2 {
654+ eprintln ! ( "finding patterns ..." ) ;
655+ }
656+ if notificationLevel >= 3 {
657+ eprintln ! ( "minimum ratio : {} " , minRatio) ;
658+ }
684659
685- ZDICT_insertDictItem ( dictList, dictListSize, solution, buffer) ;
686- cursor += solution. length as usize ;
660+ let mut doneMarks = vec ! [ 0u8 ; bufferSize + 16 ] ;
661+ let mut cursor = 0usize ;
662+ while cursor < bufferSize {
663+ if doneMarks[ cursor] != 0 {
664+ cursor += 1 ;
665+ continue ;
666+ }
687667
688- if notificationLevel >= 2 && displayClock. elapsed ( ) > refresh_rate {
689- displayClock = Instant :: now ( ) ;
690- eprint ! (
691- "\r {:4.2} % \r " ,
692- cursor as core:: ffi:: c_double / bufferSize as core:: ffi:: c_double
693- * 100.0f64 ,
694- ) ;
695- }
696- }
668+ let solution = ZDICT_analyzePos (
669+ & mut doneMarks,
670+ & suffix,
671+ reverseSuffix[ cursor] ,
672+ buffer,
673+ minRatio,
674+ notificationLevel,
675+ ) ;
676+ if solution. length == 0 {
677+ cursor += 1 ;
678+ continue ;
679+ }
680+
681+ ZDICT_insertDictItem ( dictList, dictListSize, solution, buffer) ;
682+ cursor += solution. length as usize ;
683+
684+ if notificationLevel >= 2 && displayClock. elapsed ( ) > refresh_rate {
685+ displayClock = Instant :: now ( ) ;
686+ eprint ! (
687+ "\r {:4.2} % \r " ,
688+ cursor as core:: ffi:: c_double / bufferSize as core:: ffi:: c_double * 100.0f64 ,
689+ ) ;
697690 }
698691 }
699- free ( filePos as * mut core :: ffi :: c_void ) ;
700- result
692+
693+ 0
701694}
702695
703696fn fill_noise ( buffer : & mut [ u8 ] ) {
0 commit comments