Skip to content

Commit b4a8c25

Browse files
abimaelmartellJ-F-Liu
authored andcommitted
fix(parser): accept trailing space before EOL in xref and startxref lines
Some PDF generators emit trailing spaces on structural lines like "xref \n" and "startxref \n". The xref subsection header already handled this with opt(tag(" ")), but the xref keyword and startxref keyword parsers did not, causing Xref(Start) errors on valid PDFs.
1 parent 0526740 commit b4a8c25

File tree

1 file changed

+22
-2
lines changed

1 file changed

+22
-2
lines changed

src/parser/mod.rs

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ fn xref(input: ParserInput) -> NomResult<Xref> {
458458
);
459459

460460
delimited(
461-
pair(tag(&b"xref"[..]), eol),
461+
pair(tag(&b"xref"[..]), preceded(opt(tag(&b" "[..])), eol)),
462462
fold_many1(
463463
xref_section,
464464
|| -> Xref { Xref::new(0, XrefType::CrossReferenceTable) },
@@ -512,7 +512,7 @@ pub fn xref_and_trailer(input: ParserInput, reader: &Reader) -> crate::Result<(X
512512

513513
pub fn xref_start(input: ParserInput) -> Option<i64> {
514514
strip_nom(delimited(
515-
pair(tag(&b"startxref"[..]), eol),
515+
pair(tag(&b"startxref"[..]), preceded(opt(tag(&b" "[..])), eol)),
516516
trim_spaces(integer),
517517
(eol, tag(&b"%%EOF"[..]), space),
518518
).parse(input))
@@ -903,4 +903,24 @@ EI";
903903
b"00000z0z00zzz00z0zzz0zzzEI aazazaazzzaazazzzazzz"
904904
)
905905
}
906+
907+
#[test]
908+
fn xref_trailing_space_after_keyword() {
909+
// Some PDF generators emit "xref \n" with a trailing space.
910+
let input = b"xref \n0 3\n0000000000 65535 f \n0000000017 00000 n \n0000000081 00000 n \ntrailer\n<</Size 3/Root 1 0 R>>\nstartxref\n175\n%%EOF\n";
911+
match xref(test_span(input)) {
912+
Ok((_, re)) => assert_eq!(re.entries.len(), 2),
913+
Err(err) => panic!("xref with trailing space should parse: {:?}", err),
914+
}
915+
}
916+
917+
#[test]
918+
fn startxref_trailing_space_after_keyword() {
919+
// Some PDF generators emit "startxref \n" with a trailing space.
920+
let input = b"startxref \n135738\n%%EOF\n";
921+
match xref_start(test_span(input)) {
922+
Some(num) => assert_eq!(num, 135738),
923+
None => panic!("startxref with trailing space should parse"),
924+
}
925+
}
906926
}

0 commit comments

Comments
 (0)