Skip to content

Commit e7eb667

Browse files
abimaelmartellJ-F-Liu
authored andcommitted
fix(stream): return raw content for uncompressed streams in decompressed_content()
When a stream has no /Filter entry, decompressed_content() would fail because filters() propagates the dictionary lookup error. Unfiltered streams are already uncompressed, so the correct behavior is to return the raw content as-is. This fixes text extraction from Form XObjects generated by pdfrw and similar tools that use uncompressed streams (no /Filter, just raw content like "/FullPage Do").
1 parent afe79a4 commit e7eb667

File tree

1 file changed

+20
-1
lines changed

1 file changed

+20
-1
lines changed

src/object.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,11 @@ impl Stream {
678678

679679
pub fn decompressed_content(&self) -> Result<Vec<u8>> {
680680
let params = self.dict.get(b"DecodeParms").and_then(Object::as_dict).ok();
681-
let filters = self.filters()?;
681+
let filters = match self.filters() {
682+
Ok(f) => f,
683+
// No /Filter key means the stream is uncompressed
684+
Err(_) => return Ok(self.content.clone()),
685+
};
682686

683687
let mut input = self.content.as_slice();
684688
let mut output = vec![];
@@ -889,4 +893,19 @@ mod test {
889893
let result = Stream::decompress_zlib(&compressed, None).unwrap();
890894
assert_eq!(result, original);
891895
}
896+
897+
#[test]
898+
fn test_uncompressed_stream_returns_raw_content() {
899+
use crate::Dictionary;
900+
901+
// A stream with no /Filter should return its raw content from decompressed_content()
902+
let content = b"/FullPage Do
903+
".to_vec();
904+
let mut dict = Dictionary::new();
905+
dict.set("Length", content.len() as i64);
906+
let stream = Stream::new(dict, content.clone());
907+
908+
let result = stream.decompressed_content().expect("should succeed for uncompressed stream");
909+
assert_eq!(result, content);
910+
}
892911
}

0 commit comments

Comments
 (0)