Skip to content

Commit 3592fc8

Browse files
rhuijbenBobLd
authored andcommitted
Use zlib information to verify compressed content before using it
1 parent c9034f9 commit 3592fc8

File tree

12 files changed

+299
-147
lines changed

12 files changed

+299
-147
lines changed

src/UglyToad.PdfPig.Fonts/UglyToad.PdfPig.Fonts.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
<GenerateDocumentationFile>true</GenerateDocumentationFile>
88
<SignAssembly>true</SignAssembly>
99
<AssemblyOriginatorKeyFile>..\pdfpig.snk</AssemblyOriginatorKeyFile>
10+
<Nullable>annotations</Nullable>
1011
</PropertyGroup>
1112
<ItemGroup>
1213
<None Remove="Resources\AdobeFontMetrics\*" />

src/UglyToad.PdfPig.Tests/ContentStream/IndirectReferenceTests.cs

Lines changed: 54 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
namespace UglyToad.PdfPig.Tests.ContentStream
22
{
33
using PdfPig.Core;
4+
using System.Globalization;
45

56
public class IndirectReferenceTests
67
{
@@ -33,50 +34,59 @@ public void TwoIndirectReferenceEqual()
3334
[Fact]
3435
public void IndirectReferenceHashTest()
3536
{
36-
var reference0 = new IndirectReference(1574, 690);
37-
Assert.Equal(1574, reference0.ObjectNumber);
38-
Assert.Equal(690, reference0.Generation);
39-
40-
var reference1 = new IndirectReference(-1574, 690);
41-
Assert.Equal(-1574, reference1.ObjectNumber);
42-
Assert.Equal(690, reference1.Generation);
43-
44-
var reference2 = new IndirectReference(58949797283757, 16);
45-
Assert.Equal(58949797283757, reference2.ObjectNumber);
46-
Assert.Equal(16, reference2.Generation);
47-
48-
var reference3 = new IndirectReference(-58949797283757, ushort.MaxValue);
49-
Assert.Equal(-58949797283757, reference3.ObjectNumber);
50-
Assert.Equal(ushort.MaxValue, reference3.Generation);
51-
52-
var reference4 = new IndirectReference(140737488355327, ushort.MaxValue);
53-
Assert.Equal(140737488355327, reference4.ObjectNumber);
54-
Assert.Equal(ushort.MaxValue, reference4.Generation);
55-
56-
var reference5 = new IndirectReference(-140737488355327, ushort.MaxValue);
57-
Assert.Equal(-140737488355327, reference5.ObjectNumber);
58-
Assert.Equal(ushort.MaxValue, reference5.Generation);
59-
60-
var ex0 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(140737488355328, 0));
61-
Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex0.Message);
62-
var ex1 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(-140737488355328, 0));
63-
Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex1.Message);
64-
65-
var ex2 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(1574, -1));
66-
Assert.StartsWith("Generation number must not be a negative value.", ex2.Message);
67-
68-
// We make sure object number is still correct even if generation is not
69-
var reference6 = new IndirectReference(1574, int.MaxValue);
70-
Assert.Equal(1574, reference6.ObjectNumber);
71-
72-
var reference7 = new IndirectReference(-1574, ushort.MaxValue + 10);
73-
Assert.Equal(-1574, reference7.ObjectNumber);
74-
75-
var reference9 = new IndirectReference(-140737488355327, ushort.MaxValue + 10);
76-
Assert.Equal(-140737488355327, reference9.ObjectNumber);
77-
78-
var reference10 = new IndirectReference(140737488355327, ushort.MaxValue * 10);
79-
Assert.Equal(140737488355327, reference10.ObjectNumber);
37+
CultureInfo lastCulture = CultureInfo.CurrentCulture;
38+
CultureInfo.CurrentCulture = new CultureInfo("en-US");
39+
try
40+
{
41+
var reference0 = new IndirectReference(1574, 690);
42+
Assert.Equal(1574, reference0.ObjectNumber);
43+
Assert.Equal(690, reference0.Generation);
44+
45+
var reference1 = new IndirectReference(-1574, 690);
46+
Assert.Equal(-1574, reference1.ObjectNumber);
47+
Assert.Equal(690, reference1.Generation);
48+
49+
var reference2 = new IndirectReference(58949797283757, 16);
50+
Assert.Equal(58949797283757, reference2.ObjectNumber);
51+
Assert.Equal(16, reference2.Generation);
52+
53+
var reference3 = new IndirectReference(-58949797283757, ushort.MaxValue);
54+
Assert.Equal(-58949797283757, reference3.ObjectNumber);
55+
Assert.Equal(ushort.MaxValue, reference3.Generation);
56+
57+
var reference4 = new IndirectReference(140737488355327, ushort.MaxValue);
58+
Assert.Equal(140737488355327, reference4.ObjectNumber);
59+
Assert.Equal(ushort.MaxValue, reference4.Generation);
60+
61+
var reference5 = new IndirectReference(-140737488355327, ushort.MaxValue);
62+
Assert.Equal(-140737488355327, reference5.ObjectNumber);
63+
Assert.Equal(ushort.MaxValue, reference5.Generation);
64+
65+
var ex0 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(140737488355328, 0));
66+
Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex0.Message);
67+
var ex1 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(-140737488355328, 0));
68+
Assert.StartsWith("Object number must be between -140,737,488,355,327 and 140,737,488,355,327.", ex1.Message);
69+
70+
var ex2 = Assert.Throws<ArgumentOutOfRangeException>(() => new IndirectReference(1574, -1));
71+
Assert.StartsWith("Generation number must not be a negative value.", ex2.Message);
72+
73+
// We make sure object number is still correct even if generation is not
74+
var reference6 = new IndirectReference(1574, int.MaxValue);
75+
Assert.Equal(1574, reference6.ObjectNumber);
76+
77+
var reference7 = new IndirectReference(-1574, ushort.MaxValue + 10);
78+
Assert.Equal(-1574, reference7.ObjectNumber);
79+
80+
var reference9 = new IndirectReference(-140737488355327, ushort.MaxValue + 10);
81+
Assert.Equal(-140737488355327, reference9.ObjectNumber);
82+
83+
var reference10 = new IndirectReference(140737488355327, ushort.MaxValue * 10);
84+
Assert.Equal(140737488355327, reference10.ObjectNumber);
85+
}
86+
finally
87+
{
88+
CultureInfo.CurrentCulture = lastCulture;
89+
}
8090
}
8191

8292
[Fact]

src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ public void Issue1122()
132132
var path = IntegrationHelpers.GetSpecificTestDocumentPath("StackOverflow_Issue_1122.pdf");
133133

134134
var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }));
135-
Assert.StartsWith("Reached maximum search depth while getting indirect reference.", ex.Message);
135+
Assert.Equal("The root object in the trailer did not resolve to a readable dictionary.", ex.Message);
136136
}
137137

138138
[Fact]
@@ -191,7 +191,7 @@ public void Issue1050()
191191
{
192192
var path = IntegrationHelpers.GetSpecificTestDocumentPath("SpookyPass.pdf");
193193
var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true }));
194-
Assert.Equal("Avoiding infinite recursion in ObjectLocationProvider.TryGetOffset() as 'offset' and 'reference.ObjectNumber' have the same value and opposite signs.", ex.Message);
194+
Assert.Equal("The root object in the trailer did not resolve to a readable dictionary.", ex.Message);
195195
}
196196

197197
[Fact]
@@ -356,7 +356,8 @@ public void Issue953_IntOverflow()
356356
using (var document = PdfDocument.Open(path, new ParsingOptions() { UseLenientParsing = true, SkipMissingFonts = true }))
357357
{
358358
var page = document.GetPage(13);
359-
Assert.Throws<OverflowException>(() => DocstrumBoundingBoxes.Instance.GetBlocks(page.GetWords()));
359+
// This used to fail with an overflow exception when we failed to validate the zlib encoded data
360+
Assert.NotNull(DocstrumBoundingBoxes.Instance.GetBlocks(page.GetWords()));
360361
}
361362
}
362363

src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,8 @@ 0000004385 00000 n
109109
%%EOF
110110
""";
111111

112-
if (Environment.NewLine == "\n")
113-
{
114-
content = content.Replace("\n", "\r\n");
115-
}
112+
// Handle "\r\n" or "\n" in the sourcecode in the same way
113+
content = content.Replace("\r\n", "\n").Replace("\n", "\r\n");
116114

117115
var ib = StringBytesTestConverter.Convert(content, false);
118116

src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
<AssemblyOriginatorKeyFile>..\pdfpig.snk</AssemblyOriginatorKeyFile>
1111
<RuntimeFrameworkVersion Condition="'$(TargetFramework)'=='netcoreapp2.1'">2.1.30</RuntimeFrameworkVersion>
1212
<ImplicitUsings>enable</ImplicitUsings>
13+
<Nullable>annotations</Nullable>
1314
</PropertyGroup>
1415

1516
<ItemGroup>
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
namespace UglyToad.PdfPig.Filters
2+
{
3+
using System;
4+
using System.IO;
5+
6+
internal sealed class Adler32ChecksumStream : Stream
7+
{
8+
private readonly Stream underlyingStream;
9+
10+
public Adler32ChecksumStream(Stream writeStream)
11+
{
12+
underlyingStream = writeStream ?? throw new ArgumentNullException(nameof(writeStream));
13+
}
14+
public override bool CanRead => underlyingStream.CanRead;
15+
16+
public override bool CanSeek => false;
17+
18+
public override bool CanWrite => underlyingStream.CanWrite;
19+
20+
public override long Length => underlyingStream.Length;
21+
22+
public override long Position { get => underlyingStream.Position; set => throw new NotImplementedException(); }
23+
24+
public override void Flush()
25+
{
26+
underlyingStream.Flush();
27+
}
28+
29+
public override int Read(byte[] buffer, int offset, int count)
30+
{
31+
int n = underlyingStream.Read(buffer, offset, count);
32+
33+
if (n > 0)
34+
{
35+
UpdateAdler(buffer.AsSpan(offset, n));
36+
}
37+
return n;
38+
}
39+
40+
public override long Seek(long offset, SeekOrigin origin)
41+
{
42+
throw new InvalidOperationException();
43+
}
44+
45+
public override void SetLength(long value)
46+
{
47+
throw new InvalidOperationException();
48+
}
49+
50+
public override void Write(byte[] buffer, int offset, int count)
51+
{
52+
underlyingStream.Write(buffer, offset, count);
53+
54+
if (count > 0)
55+
{
56+
UpdateAdler(buffer.AsSpan(offset, count));
57+
}
58+
}
59+
60+
public uint Checksum { get; private set; } = 1;
61+
62+
private void UpdateAdler(Span<byte> span)
63+
{
64+
const uint MOD_ADLER = 65521;
65+
uint a = Checksum & 0xFFFF;
66+
uint b = (Checksum >> 16) & 0xFFFF;
67+
68+
foreach (byte c in span)
69+
{
70+
a = (a + c) % MOD_ADLER;
71+
b = (b + a) % MOD_ADLER;
72+
}
73+
74+
Checksum = (b << 16) | a;
75+
}
76+
77+
public override void Close()
78+
{
79+
underlyingStream.Close();
80+
}
81+
}
82+
}

src/UglyToad.PdfPig/Filters/FlateFilter.cs

Lines changed: 84 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
{
33
using Fonts;
44
using System;
5+
using System.Buffers.Binary;
56
using System.IO;
67
using System.IO.Compression;
78
using Tokens;
@@ -43,6 +44,15 @@ public Memory<byte> Decode(Memory<byte> input, DictionaryToken streamDictionary,
4344
var colors = Math.Min(parameters.GetIntOrDefault(NameToken.Colors, DefaultColors), 32);
4445
var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent);
4546
var columns = parameters.GetIntOrDefault(NameToken.Columns, DefaultColumns);
47+
48+
var length = parameters.GetIntOrDefault(NameToken.Length, -1);
49+
50+
if (length > 0 && length < input.Length)
51+
{
52+
// Truncates final "\r\n" or "\n" from source data if any. Fixes detecting where the adler checksum is. (Zlib uses framing for this)
53+
input = input.Slice(0, length);
54+
}
55+
4656
return Decompress(input, predictor, colors, bitsPerComponent, columns);
4757
}
4858
catch
@@ -55,29 +65,83 @@ public Memory<byte> Decode(Memory<byte> input, DictionaryToken streamDictionary,
5565

5666
private static Memory<byte> Decompress(Memory<byte> input, int predictor, int colors, int bitsPerComponent, int columns)
5767
{
58-
using (var memoryStream = MemoryHelper.AsReadOnlyMemoryStream(input))
68+
#if NET
69+
using var memoryStream = MemoryHelper.AsReadOnlyMemoryStream(input);
70+
try
5971
{
60-
// The first 2 bytes are the header which DeflateStream does not support.
61-
memoryStream.ReadByte();
62-
memoryStream.ReadByte();
63-
64-
try
72+
using (var zlib = new ZLibStream(memoryStream, CompressionMode.Decompress))
73+
using (var output = new MemoryStream((int)(input.Length * 1.5)))
74+
using (var f = PngPredictor.WrapPredictor(output, predictor, colors, bitsPerComponent, columns))
6575
{
66-
using (var deflate = new DeflateStream(memoryStream, CompressionMode.Decompress))
67-
using (var output = new MemoryStream((int)(input.Length * 1.5)))
68-
using (var f = PngPredictor.WrapPredictor(output, predictor, colors, bitsPerComponent, columns))
69-
{
70-
deflate.CopyTo(f);
71-
f.Flush();
76+
zlib.CopyTo(f);
77+
f.Flush();
7278

73-
return output.AsMemory();
74-
}
79+
return output.AsMemory();
80+
}
81+
}
82+
catch (InvalidDataException ex)
83+
{
84+
throw new CorruptCompressedDataException("Invalid Flate compressed stream encountered", ex);
85+
}
86+
#else
87+
// Ideally we would like to use the ZLibStream class but that is only available in .NET 5+.
88+
// We look at the raw data now
89+
// * First we have 2 bytes, specifying the type of compression
90+
// * Then we have the deflated data
91+
// * Then we have a 4 byte checksum (Adler32)
92+
93+
// Would be so nice to have zlib do the framing here... but the deflate stream already reads data from the stream that we need.
94+
95+
using var memoryStream = MemoryHelper.AsReadOnlyMemoryStream(input.Slice(2, input.Length - 2 /* Header */ - 4 /* Checksum */));
96+
// The first 2 bytes are the header which DeflateStream can't handle. After the s
97+
var adlerBytes = input.Slice(input.Length - 4, 4).Span;
98+
uint expected = BinaryPrimitives.ReadUInt32BigEndian(adlerBytes);
99+
uint altExpected = expected;
100+
101+
// Sometimes the data ends with "\r\n", "\r" or "\n" and we don't know if it is part of the zlib
102+
// Ideally this would have been removed by the caller from the provided length...
103+
if (adlerBytes[3] == '\n' || adlerBytes[3] == '\r')
104+
{
105+
if (adlerBytes[3] == '\n' && adlerBytes[2] == '\r')
106+
{
107+
// Now we don't know which value is the good one. The value could be ok, or padding.
108+
// Lets allow both values for now. Allowing two out of 2^32 is much better than allowing everything
109+
adlerBytes = input.Slice(input.Length - 6, 4).Span;
75110
}
76-
catch (InvalidDataException ex)
111+
else
77112
{
78-
throw new CorruptCompressedDataException("Invalid Flate compressed stream encountered", ex);
113+
// Same but now for just '\n' or '\r' instead of '\r\n'
114+
adlerBytes = input.Slice(input.Length - 5, 4).Span;
79115
}
116+
117+
altExpected = BinaryPrimitives.ReadUInt32BigEndian(adlerBytes);
118+
}
119+
120+
121+
try
122+
{
123+
using (var deflate = new DeflateStream(memoryStream, CompressionMode.Decompress))
124+
using (var adlerStream = new Adler32ChecksumStream(deflate))
125+
using (var output = new MemoryStream((int)(input.Length * 1.5)))
126+
using (var f = PngPredictor.WrapPredictor(output, predictor, colors, bitsPerComponent, columns))
127+
{
128+
adlerStream.CopyTo(f);
129+
f.Flush();
130+
131+
uint actual = adlerStream.Checksum;
132+
if (expected != actual && altExpected != actual)
133+
{
134+
throw new CorruptCompressedDataException("Flate stream has invalid checksum");
135+
}
136+
137+
return output.AsMemory();
138+
}
139+
}
140+
catch (InvalidDataException ex)
141+
{
142+
throw new CorruptCompressedDataException("Invalid Flate compressed stream encountered", ex);
80143
}
144+
#endif
81145
}
82146

83147
/// <inheritdoc />
@@ -95,9 +159,10 @@ public byte[] Encode(Stream input, DictionaryToken streamDictionary, int index)
95159

96160
using (var compressStream = new MemoryStream())
97161
using (var compressor = new DeflateStream(compressStream, CompressionLevel.Fastest))
162+
using (var adlerStream = new Adler32ChecksumStream(compressor))
98163
{
99-
compressor.Write(data, 0, data.Length);
100-
compressor.Close();
164+
adlerStream.Write(data, 0, data.Length);
165+
adlerStream.Close();
101166

102167
var compressed = compressStream.ToArray();
103168

@@ -111,7 +176,7 @@ public byte[] Encode(Stream input, DictionaryToken streamDictionary, int index)
111176
Array.Copy(compressed, 0, result, headerLength, compressed.Length);
112177

113178
// Write Checksum of raw data.
114-
var checksum = Adler32Checksum.Calculate(data);
179+
var checksum = adlerStream.Checksum;
115180

116181
var offset = headerLength + compressed.Length;
117182

0 commit comments

Comments
 (0)