Skip to content

Commit 4bdb85d

Browse files
committed
Modernise PngPredictor and refactor LzwFilter and FlateFilter to reduce memory allocation
1 parent f84f2ac commit 4bdb85d

File tree

4 files changed

+369
-792
lines changed

4 files changed

+369
-792
lines changed

src/UglyToad.PdfPig/Filters/FlateFilter.cs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,18 +40,10 @@ public Memory<byte> Decode(Memory<byte> input, DictionaryToken streamDictionary,
4040

4141
try
4242
{
43-
var decompressed = Decompress(input);
44-
45-
if (predictor == -1)
46-
{
47-
return decompressed;
48-
}
49-
5043
var colors = Math.Min(parameters.GetIntOrDefault(NameToken.Colors, DefaultColors), 32);
5144
var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent);
5245
var columns = parameters.GetIntOrDefault(NameToken.Columns, DefaultColumns);
53-
54-
return PngPredictor.Decode(decompressed, predictor, colors, bitsPerComponent, columns);
46+
return Decompress(input, predictor, colors, bitsPerComponent, columns);
5547
}
5648
catch
5749
{
@@ -61,10 +53,9 @@ public Memory<byte> Decode(Memory<byte> input, DictionaryToken streamDictionary,
6153
return input;
6254
}
6355

64-
private static byte[] Decompress(Memory<byte> input)
56+
private static Memory<byte> Decompress(Memory<byte> input, int predictor, int colors, int bitsPerComponent, int columns)
6557
{
6658
using (var memoryStream = MemoryHelper.AsReadOnlyMemoryStream(input))
67-
using (var output = new MemoryStream())
6859
{
6960
// The first 2 bytes are the header which DeflateStream does not support.
7061
memoryStream.ReadByte();
@@ -73,8 +64,17 @@ private static byte[] Decompress(Memory<byte> input)
7364
try
7465
{
7566
using (var deflate = new DeflateStream(memoryStream, CompressionMode.Decompress))
67+
using (var output = new MemoryStream((int)(input.Length * 1.5)))
68+
using (var f = PngPredictor.WrapPredictor(output, predictor, colors, bitsPerComponent, columns))
7669
{
77-
deflate.CopyTo(output);
70+
deflate.CopyTo(f);
71+
f.Flush();
72+
73+
if (output.TryGetBuffer(out var segment))
74+
{
75+
return segment.AsMemory();
76+
}
77+
7878
return output.ToArray();
7979
}
8080
}

src/UglyToad.PdfPig/Filters/LzwFilter.cs

Lines changed: 70 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ namespace UglyToad.PdfPig.Filters
55
using System;
66
using System.Collections.Generic;
77
using Lzw;
8+
using System.IO;
89
using Tokens;
910
using Util;
1011

@@ -37,105 +38,106 @@ public Memory<byte> Decode(Memory<byte> input, DictionaryToken streamDictionary,
3738

3839
var earlyChange = parameters.GetIntOrDefault(NameToken.EarlyChange, 1);
3940

40-
if (predictor > 1)
41+
var colors = Math.Min(parameters.GetIntOrDefault(NameToken.Colors, DefaultColors), 32);
42+
var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent);
43+
var columns = parameters.GetIntOrDefault(NameToken.Columns, DefaultColumns);
44+
45+
return Decode(input.Span, earlyChange == 1, predictor, colors, bitsPerComponent, columns);
46+
}
47+
48+
private static Memory<byte> Decode(ReadOnlySpan<byte> input, bool isEarlyChange, int predictor, int colors, int bitsPerComponent, int columns)
49+
{
50+
using (var output = new MemoryStream((int)(input.Length * 1.5))) // A guess.
51+
using (var result = PngPredictor.WrapPredictor(output, predictor, colors, bitsPerComponent, columns))
4152
{
42-
var decompressed = Decode(input.Span, earlyChange == 1);
53+
var table = GetDefaultTable();
4354

44-
var colors = Math.Min(parameters.GetIntOrDefault(NameToken.Colors, DefaultColors), 32);
45-
var bitsPerComponent = parameters.GetIntOrDefault(NameToken.BitsPerComponent, DefaultBitsPerComponent);
46-
var columns = parameters.GetIntOrDefault(NameToken.Columns, DefaultColumns);
55+
var codeBits = 9;
4756

48-
return PngPredictor.Decode(decompressed, predictor, colors, bitsPerComponent, columns);
49-
}
57+
var data = new BitStream(input);
5058

51-
return Decode(input.Span, earlyChange == 1);
52-
}
59+
var codeOffset = isEarlyChange ? 0 : 1;
5360

54-
private static byte[] Decode(ReadOnlySpan<byte> input, bool isEarlyChange)
55-
{
56-
// A guess.
57-
var result = new List<byte>((int)(input.Length * 1.5));
61+
var previous = -1;
5862

59-
var table = GetDefaultTable();
63+
while (true)
64+
{
65+
var next = data.Get(codeBits);
6066

61-
var codeBits = 9;
67+
if (next == EodMarker)
68+
{
69+
break;
70+
}
6271

63-
var data = new BitStream(input);
72+
if (next == ClearTable)
73+
{
74+
table = GetDefaultTable();
75+
previous = -1;
76+
codeBits = 9;
77+
continue;
78+
}
6479

65-
var codeOffset = isEarlyChange ? 0 : 1;
80+
if (table.TryGetValue(next, out var b))
81+
{
82+
result.Write(b,0, b.Length);
6683

67-
var previous = -1;
84+
if (previous >= 0)
85+
{
86+
var lastSequence = table[previous];
6887

69-
while (true)
70-
{
71-
var next = data.Get(codeBits);
88+
var newSequence = new byte[lastSequence.Length + 1];
7289

73-
if (next == EodMarker)
74-
{
75-
break;
76-
}
77-
78-
if (next == ClearTable)
79-
{
80-
table = GetDefaultTable();
81-
previous = -1;
82-
codeBits = 9;
83-
continue;
84-
}
90+
Array.Copy(lastSequence, newSequence, lastSequence.Length);
8591

86-
if (table.TryGetValue(next, out var b))
87-
{
88-
result.AddRange(b);
92+
newSequence[lastSequence.Length] = b[0];
8993

90-
if (previous >= 0)
94+
table[table.Count] = newSequence;
95+
}
96+
}
97+
else
9198
{
9299
var lastSequence = table[previous];
93100

94101
var newSequence = new byte[lastSequence.Length + 1];
95102

96103
Array.Copy(lastSequence, newSequence, lastSequence.Length);
97104

98-
newSequence[lastSequence.Length] = b[0];
105+
newSequence[lastSequence.Length] = lastSequence[0];
99106

107+
result.Write(newSequence, 0, newSequence.Length);
108+
100109
table[table.Count] = newSequence;
101110
}
102-
}
103-
else
104-
{
105-
var lastSequence = table[previous];
106-
107-
var newSequence = new byte[lastSequence.Length + 1];
108-
109-
Array.Copy(lastSequence, newSequence, lastSequence.Length);
110111

111-
newSequence[lastSequence.Length] = lastSequence[0];
112+
previous = next;
112113

113-
result.AddRange(newSequence);
114-
115-
table[table.Count] = newSequence;
114+
if (table.Count >= ElevenBitBoundary + codeOffset)
115+
{
116+
codeBits = 12;
117+
}
118+
else if (table.Count >= TenBitBoundary + codeOffset)
119+
{
120+
codeBits = 11;
121+
}
122+
else if (table.Count >= NineBitBoundary + codeOffset)
123+
{
124+
codeBits = 10;
125+
}
126+
else
127+
{
128+
codeBits = 9;
129+
}
116130
}
117-
118-
previous = next;
119131

120-
if (table.Count >= ElevenBitBoundary + codeOffset)
121-
{
122-
codeBits = 12;
123-
}
124-
else if (table.Count >= TenBitBoundary + codeOffset)
125-
{
126-
codeBits = 11;
127-
}
128-
else if (table.Count >= NineBitBoundary + codeOffset)
129-
{
130-
codeBits = 10;
131-
}
132-
else
132+
result.Flush();
133+
134+
if (output.TryGetBuffer(out var segment))
133135
{
134-
codeBits = 9;
136+
return segment.AsMemory();
135137
}
138+
139+
return output.ToArray();
136140
}
137-
138-
return result.ToArray();
139141
}
140142

141143
private static Dictionary<int, byte[]> GetDefaultTable()

0 commit comments

Comments
 (0)