Skip to content

Commit 83d6fc6

Browse files
EliotJonesBobLd
authored andcommitted
allow missing catalog type definition for catalog dictionary
as long as there is a pages entry we accept this in lenient parsing mode. this is to fix document 006705.pdf in the corpus that had '/calalog' as the dictionary entry. also adds a test for some weird content stream content in 0006324.pdf where numbers seem to get split in the content stream on a decimal place. this is just to check that our parser doesn't hard crash
1 parent febfa4d commit 83d6fc6

File tree

2 files changed

+43
-1
lines changed

2 files changed

+43
-1
lines changed

src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,47 @@ public void SkipsCommentsInStreams()
261261
Assert.Equal(3, tokens.OfType<DictionaryToken>().Count());
262262
}
263263

264+
[Fact]
265+
public void Document006324Test()
266+
{
267+
const string content =
268+
"""
269+
q
270+
1 0 0 1 248.6304 572.546 cm
271+
0 0 m
272+
0.021 -0.007 l
273+
3 -0.003 -0.01 0 0 0 c
274+
f
275+
Q
276+
q
277+
1 0 0 1 2489394 57249855 cm
278+
0 0 m
279+
-0.046 -0.001 -0.609 0.029 -0.286 -0.014 c
280+
-02.61 -0.067 -0.286 -0. .61 -0 0 c
281+
f
282+
Q
283+
q
284+
1 0 0 1 24862464 572. .836 cm
285+
0 0 m
286+
0.936 -0.029 l
287+
0.038 -0.021 0.55 -0.014 0 0 c
288+
f
289+
Q
290+
""";
291+
292+
var tokens = new List<IToken>();
293+
294+
var scanner = new CoreTokenScanner(
295+
StringBytesTestConverter.Convert(content, false).Bytes,
296+
true,
297+
isStream: true);
298+
299+
while (scanner.MoveNext())
300+
{
301+
tokens.Add(scanner.CurrentToken);
302+
}
303+
}
304+
264305
private static void AssertCorrectToken<T, TData>(IToken token, TData expected) where T : IDataToken<TData>
265306
{
266307
var cast = Assert.IsType<T>(token);

src/UglyToad.PdfPig/Parser/CatalogFactory.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ public static Catalog Create(IndirectReference rootReference, DictionaryToken di
1919
throw new ArgumentNullException(nameof(dictionary));
2020
}
2121

22-
if (dictionary.TryGet(NameToken.Type, out var type) && !ReferenceEquals(type, NameToken.Catalog))
22+
if (dictionary.TryGet(NameToken.Type, out var type) && !ReferenceEquals(type, NameToken.Catalog)
23+
&& !isLenientParsing)
2324
{
2425
throw new PdfDocumentFormatException($"The type of the catalog dictionary was not Catalog: {dictionary}.");
2526
}

0 commit comments

Comments
 (0)