J-F-Liu · J-F-Liu · Mar 22, 2026 · Mar 19, 2026
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
@@ -409,15 +409,28 @@ fn _indirect_object<'a>(
     Ok((object_id, object))
 }
 
-pub fn header(input: ParserInput) -> Option<String> {
-    strip_nom(map_res(
-        delimited(
-            tag(&b"%PDF-"[..]),
+pub fn header(input: ParserInput, strict: bool) -> Option<String> {
+    // Parse version digits (e.g. "1.7") separately from any trailing bytes
+    // before the newline.  Some PDF generators (e.g. ImageMill) place binary
+    // marker bytes on the header line which would fail UTF-8 validation.
+    // In strict mode we reject such trailing bytes; in lenient mode we skip them.
+    let (_, (version_raw, trailing)) = delimited(
+        tag(&b"%PDF-"[..]),
+        pair(
+            take_while(|c: u8| c.is_ascii_digit() || c == b'.'),
             take_while(|c: u8| !b"\r\n".contains(&c)),
-            pair(eol, many0_count(comment)),
         ),
-        |v: ParserInput| str::from_utf8(&v).map(Into::into),
-    ).parse(input))
+        pair(eol, many0_count(comment)),
+    )
+    .parse(input)
+    .ok()?;
+
+    if strict && !trailing.is_empty() {
+        return None;
+    }
+
+    let version = str::from_utf8(&version_raw).ok()?.to_string();
+    Some(version)
 }
 
 pub fn binary_mark(input: ParserInput) -> Option<Vec<u8>> {
@@ -802,6 +815,49 @@ startxref
         }
     }
 
+    #[test]
+    fn header_standard() {
+        // Standard header with proper EOL
+        let input = b"%PDF-1.7\n%\xe2\xe3\xcf\xd3\n";
+        assert_eq!(header(test_span(input), false), Some("1.7".to_string()));
+    }
+
+    #[test]
+    fn header_with_binary_bytes_on_same_line() {
+        // Some generators (e.g. ImageMill) place binary marker bytes on the
+        // header line without a separating newline or '%' prefix.
+        let input = b"%PDF-1.3 \xb0\x9f\x92\x9c\x9f\xd4\xe0\xce\xd0\xd0\xd0\r1 0 obj\r";
+        assert_eq!(header(test_span(input), false), Some("1.3".to_string()));
+    }
+
+    #[test]
+    fn header_with_binary_bytes_strict_rejects() {
+        // In strict mode, binary bytes on the header line should cause a
+        // parse failure (the raw bytes are not valid UTF-8).
+        let input = b"%PDF-1.3 \xb0\x9f\x92\x9c\x9f\xd4\xe0\xce\xd0\xd0\xd0\r1 0 obj\r";
+        assert_eq!(header(test_span(input), true), None);
+    }
+
+    #[test]
+    fn header_cr_line_ending() {
+        // CR-only line ending (common in older PDFs)
+        let input = b"%PDF-1.3\r%\xe2\xe3\xcf\xd3\r";
+        assert_eq!(header(test_span(input), false), Some("1.3".to_string()));
+    }
+
+    #[test]
+    fn header_crlf_line_ending() {
+        // CRLF line ending (common on Windows-generated PDFs)
+        let input = b"%PDF-1.7\r\n%\xe2\xe3\xcf\xd3\r\n";
+        assert_eq!(header(test_span(input), false), Some("1.7".to_string()));
+    }
+
+    #[test]
+    fn header_pdf_2_0() {
+        let input = b"%PDF-2.0\n%\xe2\xe3\xcf\xd3\n";
+        assert_eq!(header(test_span(input), false), Some("2.0".to_string()));
+    }
+
     #[test]
     fn content_with_comments() {
         // It should be processed as usual but ignoring the comments

diff --git a/src/reader.rs b/src/reader.rs
@@ -500,7 +500,7 @@ impl Reader<'_> {
         self.buffer = &self.buffer[offset..];
 
         let version =
-            parser::header(ParserInput::new_extra(self.buffer, "header")).ok_or(ParseError::InvalidFileHeader)?;
+            parser::header(ParserInput::new_extra(self.buffer, "header"), self.strict).ok_or(ParseError::InvalidFileHeader)?;
 
         let xref_start = Self::get_xref_start(self.buffer)?;
         if xref_start > self.buffer.len() {
@@ -741,7 +741,7 @@ impl Reader<'_> {
         // The document structure can be expressed in PEG as:
         //   document <- header indirect_object* xref trailer xref_start
         let version =
-            parser::header(ParserInput::new_extra(self.buffer, "header")).ok_or(ParseError::InvalidFileHeader)?;
+            parser::header(ParserInput::new_extra(self.buffer, "header"), self.strict).ok_or(ParseError::InvalidFileHeader)?;
 
         //The binary_mark is in line 2 after the pdf version. If at other line number, then will be declared as invalid pdf.
         if let Some(pos) = self.buffer.iter().position(|&byte| byte == b'\n') {

diff --git a/tests/load_options_test.rs b/tests/load_options_test.rs
@@ -1,4 +1,4 @@
 use lopdf::{Document, LoadOptions, Object};

 #[cfg(not(feature = "async"))]
 mod sync_tests {
@@ -187,4 +187,36 @@
         let result = Document::load_with_options("nonexistent.pdf", LoadOptions::default());
         assert!(result.is_err());
     }
+
+    #[test]
+    fn strict_rejects_binary_bytes_on_header_line() {
+        // Minimal PDF-like buffer with binary bytes on the header line.
+        // Strict mode should reject this with InvalidFileHeader.
+        let buf = b"%PDF-1.3 \xb0\x9f\x92\x9c\r%%EOF\r";
+        let result = Document::load_mem_with_options(
+            buf,
+            LoadOptions { strict: true, ..Default::default() },
+        );
+        let err = result.unwrap_err().to_string();
+        assert!(
+            err.contains("invalid file header"),
+            "expected InvalidFileHeader, got: {err}"
+        );
+    }
+
+    #[test]
+    fn lenient_accepts_binary_bytes_on_header_line() {
+        // Same buffer, but lenient (default) mode should parse the header
+        // successfully (it will fail later because the rest isn't a real PDF,
+        // but the header itself should be accepted).
+        let buf = b"%PDF-1.3 \xb0\x9f\x92\x9c\r%%EOF\r";
+        let result = Document::load_mem_with_options(buf, LoadOptions::default());
+        // The error should NOT be InvalidFileHeader — the header parsed fine.
+        if let Err(e) = &result {
+            assert!(
+                !e.to_string().contains("invalid file header"),
+                "lenient mode should accept binary bytes on header line, got: {e}"
+            );
+        }
+    }
 }