@@ -39,12 +39,12 @@ pub type Result<T> = std::result::Result<T, Error>;
3939
4040/// Definition of the data transformation for the network retrieved, binencoded rules and tokenizer datasets.
4141pub trait TransformDataFn :
42- for < ' w > Fn ( Box < dyn Read > , Box < dyn Write + ' w > ) -> result:: Result < ( ) , OtherError >
42+ for <' w , ' r > Fn ( Box < dyn Read + ' r > , Box < dyn Write + ' w > ) -> result:: Result < ( ) , OtherError >
4343{
4444}
4545
4646impl < T > TransformDataFn for T where
47- T : for < ' w > Fn ( Box < dyn Read > , Box < dyn Write + ' w > ) -> result:: Result < ( ) , OtherError >
47+ T : for <' w , ' r > Fn ( Box < dyn Read + ' r > , Box < dyn Write + ' w > ) -> result:: Result < ( ) , OtherError >
4848{
4949}
5050
@@ -155,6 +155,7 @@ fn obtain_binary_cache_or_github(
155155 let mut intermediate = Box :: new ( Cursor :: new ( Vec :: < u8 > :: new ( ) ) ) ;
156156 transform_data_fn ( Box :: new ( reader_binenc) , Box :: new ( & mut intermediate) )
157157 . map_err ( Error :: TransformError ) ?;
158+ intermediate. seek ( SeekFrom :: Start ( 0_u64 ) ) ?;
158159 intermediate
159160 } else {
160161 Box :: new ( reader_binenc)
@@ -355,8 +356,8 @@ impl BinaryBuilder {
355356 . open ( & tokenizer_out) ?,
356357 ) ;
357358 if let Some ( ref transform_data_fn) = self . transform_data_fn {
358- let mut transfer_buffer_rules = Cursor :: new ( Vec :: new ( ) ) ;
359- let mut transfer_buffer_tokenizer = Cursor :: new ( Vec :: new ( ) ) ;
359+ let mut transfer_buffer_rules = Vec :: new ( ) ;
360+ let mut transfer_buffer_tokenizer = Vec :: new ( ) ;
360361
361362 compile:: compile (
362363 build_dir,
@@ -365,10 +366,13 @@ impl BinaryBuilder {
365366 )
366367 . map_err ( Error :: CollationFailed ) ?;
367368
368- transform_data_fn ( Box :: new ( transfer_buffer_rules) , Box :: new ( rules_sink) )
369+ assert_ne ! ( transfer_buffer_rules. len( ) , 0 ) ;
370+ assert_ne ! ( transfer_buffer_tokenizer. len( ) , 0 ) ;
371+
372+ transform_data_fn ( Box :: new ( & mut transfer_buffer_rules. as_slice ( ) ) , Box :: new ( rules_sink) )
369373 . map_err ( Error :: TransformError ) ?;
370374 transform_data_fn (
371- Box :: new ( transfer_buffer_tokenizer) ,
375+ Box :: new ( & mut transfer_buffer_tokenizer. as_slice ( ) ) ,
372376 Box :: new ( tokenizer_sink) ,
373377 )
374378 . map_err ( Error :: TransformError ) ?;
@@ -652,7 +656,9 @@ mod tests {
652656 . join ( Path :: new ( & tokenizer_filename ( "en" ) ) )
653657 . with_extension ( "bin.gz" ) ;
654658 assert ! ( tokenizer_path. exists( ) ) ;
655- smush:: decode ( & fs:: read ( tokenizer_path) ?, smush:: Codec :: Gzip ) . unwrap ( ) ;
659+ let decoded = smush:: decode ( & fs:: read ( tokenizer_path) ?, smush:: Codec :: Gzip ) . unwrap ( ) ;
660+
661+ let _ = nlprule_030:: Tokenizer :: new_from ( & mut decoded. as_slice ( ) ) . unwrap ( ) ;
656662
657663 Ok ( ( ) )
658664 }
@@ -700,6 +706,8 @@ mod tests {
700706 let mut decoded = Vec :: new ( ) ;
701707 decoder. read_to_end ( & mut decoded) . unwrap ( ) ;
702708
709+ let _ = nlprule_030:: Rules :: new_from ( & mut decoded. as_slice ( ) ) . unwrap ( ) ;
710+
703711 Ok ( ( ) )
704712 }
705713
@@ -763,15 +771,9 @@ mod tests {
763771 let rules_path = tempdir
764772 . join ( Path :: new ( & rules_filename ( "en" ) ) )
765773 . with_extension ( "bin" ) ;
766- assert ! ( rules_path. exists ( ) ) ;
774+ assert ! ( rules_path. is_file ( ) ) ;
767775
768- // The following will always fail since the versions will mismatch and rebuilding does not make sense
769- // `get_build_dir` is tested separately
770- //
771- // ```rust,no_run
772- // let _ = nlprule::Rules::new(rules_path)
773- // .map_err(|e| Error::ValidationFailed("en".to_owned(), Binary::Rules, e))?;
774- // ```
776+ let _ = nlprule_030:: Rules :: new ( rules_path) . unwrap ( ) ;
775777 Ok ( ( ) )
776778 }
777779}
0 commit comments