Skip to content

Commit 28c9358

Browse files
authored
nlprule-build: sure cursor is not seek'd all to the end (#39)
* make sure cursor is not at the end already Since transform data is short lived, the input data, input can be bound by 'r as well. * add another missing seek back to 0 * assure test data is decoded successfully
1 parent aa4fa35 commit 28c9358

File tree

2 files changed

+18
-15
lines changed

2 files changed

+18
-15
lines changed

build/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ fs-err = "2.5"
2323
tempdir = "0.3"
2424
smush = "0.1.5"
2525
env_logger = "0.8"
26+
nlprule_030 = { package = "nlprule", version = "0.3.0" }

build/src/lib.rs

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,12 @@ pub type Result<T> = std::result::Result<T, Error>;
3939

4040
/// Definition of the data transformation for the network retrieved, binencoded rules and tokenizer datasets.
4141
pub trait TransformDataFn:
42-
for<'w> Fn(Box<dyn Read>, Box<dyn Write + 'w>) -> result::Result<(), OtherError>
42+
for<'w, 'r> Fn(Box<dyn Read + 'r>, Box<dyn Write + 'w>) -> result::Result<(), OtherError>
4343
{
4444
}
4545

4646
impl<T> TransformDataFn for T where
47-
T: for<'w> Fn(Box<dyn Read>, Box<dyn Write + 'w>) -> result::Result<(), OtherError>
47+
T: for<'w, 'r> Fn(Box<dyn Read + 'r>, Box<dyn Write + 'w>) -> result::Result<(), OtherError>
4848
{
4949
}
5050

@@ -155,6 +155,7 @@ fn obtain_binary_cache_or_github(
155155
let mut intermediate = Box::new(Cursor::new(Vec::<u8>::new()));
156156
transform_data_fn(Box::new(reader_binenc), Box::new(&mut intermediate))
157157
.map_err(Error::TransformError)?;
158+
intermediate.seek(SeekFrom::Start(0_u64))?;
158159
intermediate
159160
} else {
160161
Box::new(reader_binenc)
@@ -355,8 +356,8 @@ impl BinaryBuilder {
355356
.open(&tokenizer_out)?,
356357
);
357358
if let Some(ref transform_data_fn) = self.transform_data_fn {
358-
let mut transfer_buffer_rules = Cursor::new(Vec::new());
359-
let mut transfer_buffer_tokenizer = Cursor::new(Vec::new());
359+
let mut transfer_buffer_rules = Vec::new();
360+
let mut transfer_buffer_tokenizer = Vec::new();
360361

361362
compile::compile(
362363
build_dir,
@@ -365,10 +366,13 @@ impl BinaryBuilder {
365366
)
366367
.map_err(Error::CollationFailed)?;
367368

368-
transform_data_fn(Box::new(transfer_buffer_rules), Box::new(rules_sink))
369+
assert_ne!(transfer_buffer_rules.len(), 0);
370+
assert_ne!(transfer_buffer_tokenizer.len(), 0);
371+
372+
transform_data_fn(Box::new(&mut transfer_buffer_rules.as_slice()), Box::new(rules_sink))
369373
.map_err(Error::TransformError)?;
370374
transform_data_fn(
371-
Box::new(transfer_buffer_tokenizer),
375+
Box::new(&mut transfer_buffer_tokenizer.as_slice()),
372376
Box::new(tokenizer_sink),
373377
)
374378
.map_err(Error::TransformError)?;
@@ -652,7 +656,9 @@ mod tests {
652656
.join(Path::new(&tokenizer_filename("en")))
653657
.with_extension("bin.gz");
654658
assert!(tokenizer_path.exists());
655-
smush::decode(&fs::read(tokenizer_path)?, smush::Codec::Gzip).unwrap();
659+
let decoded = smush::decode(&fs::read(tokenizer_path)?, smush::Codec::Gzip).unwrap();
660+
661+
let _ = nlprule_030::Tokenizer::new_from(&mut decoded.as_slice()).unwrap();
656662

657663
Ok(())
658664
}
@@ -700,6 +706,8 @@ mod tests {
700706
let mut decoded = Vec::new();
701707
decoder.read_to_end(&mut decoded).unwrap();
702708

709+
let _ = nlprule_030::Rules::new_from(&mut decoded.as_slice()).unwrap();
710+
703711
Ok(())
704712
}
705713

@@ -763,15 +771,9 @@ mod tests {
763771
let rules_path = tempdir
764772
.join(Path::new(&rules_filename("en")))
765773
.with_extension("bin");
766-
assert!(rules_path.exists());
774+
assert!(rules_path.is_file());
767775

768-
// The following will always fail since the versions will mismatch and rebuilding does not make sense
769-
// `get_build_dir` is tested separately
770-
//
771-
// ```rust,no_run
772-
// let _ = nlprule::Rules::new(rules_path)
773-
// .map_err(|e| Error::ValidationFailed("en".to_owned(), Binary::Rules, e))?;
774-
// ```
776+
let _ = nlprule_030::Rules::new(rules_path).unwrap();
775777
Ok(())
776778
}
777779
}

0 commit comments

Comments
 (0)