Skip to content

Commit 46815a3

Browse files
committed
Fixed Xpress decompression
1 parent d549ffc commit 46815a3

File tree

6 files changed

+446
-327
lines changed

6 files changed

+446
-327
lines changed
Lines changed: 40 additions & 200 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
using System;
22
using System.Buffers.Binary;
3+
using System.Diagnostics;
34
using System.IO;
45

56
namespace DiscUtils.Compression;
67

78
public sealed class XpressLz77 : IBlockDecompressor
89
{
10+
public static XpressLz77 Default { get; } = new();
11+
912
int IBlockDecompressor.BlockSize { get; set; }
1013

1114
bool IBlockDecompressor.TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> output, out int decompressedSize)
@@ -24,8 +27,9 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
2427
// Ref: MS-XCA §2.3.4 Processing (literal/match flags in 32-bit chunks; 16-bit match token). :contentReference[oaicite:3]{index=3}
2528

2629
var expectedSize = output.Length;
27-
int src = 0;
30+
var src = 0;
2831
decompressedSize = 0;
32+
var cachedLenNibble = -1; // -1 = empty; otherwise 0..15
2933

3034
while (decompressedSize < expectedSize)
3135
{
@@ -35,17 +39,17 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
3539
}
3640

3741
// Flags are processed MSB → LSB (we write them, then consume from the high bit).
38-
uint flags = BinaryPrimitives.ReadUInt32LittleEndian(compressed.Slice(src, 4));
42+
var flags = BinaryPrimitives.ReadUInt32LittleEndian(compressed.Slice(src, 4));
3943
src += 4;
4044

41-
for (int i = 0; i < 32; i++)
45+
for (var i = 0; i < 32; i++)
4246
{
4347
if (decompressedSize >= expectedSize)
4448
{
4549
break; // Done
4650
}
4751

48-
bool isMatch = (flags & 0x8000_0000u) != 0;
52+
var isMatch = (flags & 0x8000_0000u) != 0;
4953
flags <<= 1;
5054

5155
if (!isMatch)
@@ -57,6 +61,7 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
5761
}
5862

5963
output[decompressedSize++] = compressed[src++];
64+
6065
continue;
6166
}
6267

@@ -66,11 +71,11 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
6671
return false;
6772
}
6873

69-
ushort token = BinaryPrimitives.ReadUInt16LittleEndian(compressed.Slice(src, 2));
74+
var token = BinaryPrimitives.ReadUInt16LittleEndian(compressed.Slice(src, 2));
7075
src += 2;
7176

72-
int matchOffset = ((token >> 3) & 0x1FFF) + 1; // 13 bits + bias
73-
int lenMinus3 = (token & 0x7); // 3 low bits
77+
var matchOffset = ((token >> 3) & 0x1FFF) + 1; // 13 bits + bias
78+
var lenMinus3 = (token & 0x7); // 3 low bits
7479
int matchLen;
7580

7681
if (lenMinus3 < 7)
@@ -79,46 +84,39 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
7984
}
8085
else
8186
{
82-
// Extended length path: the next nibble (4 bits) comes in a packed scheme.
83-
// MS-XCA describes a “half-byte” reuse; we follow the encoder’s layout:
84-
// First we read a single byte that contributes 4 or 8 bits depending on reuse;
85-
// Simpler to implement decoder-side as: read a nibble from next byte,
86-
// If nibble==15, drop into the byte(s) extension path.
87-
88-
// We read one “length control” byte that holds one or two 4-bit fields.
89-
if (src >= compressed.Length)
87+
// Extended length: consume a 4-bit nibble that is packed 2-per-byte.
88+
// We must reuse the high nibble on every second extended-length match.
89+
int nibble;
90+
if (cachedLenNibble >= 0)
9091
{
91-
return false;
92+
nibble = cachedLenNibble;
93+
cachedLenNibble = -1;
9294
}
95+
else
96+
{
97+
if (src >= compressed.Length)
98+
{
99+
return false;
100+
}
93101

94-
byte lenCtl = compressed[src++];
95-
int lenNibbleLow = (lenCtl & 0x0F);
96-
97-
// The encoder may pack two 4-bit values across matches;
98-
// to keep decoder robust, consume low nibble first, then high nibble
99-
// on the next long-length in the same flag run.
100-
// Many streams set only one nibble here per long length.
101-
// int lenNibbleHigh = (lenCtl >> 4) & 0x0F;
102-
103-
// The encoder may pack two 4-bit values across matches; to keep decoder robust,
104-
// consume low nibble first, then high nibble on the *next* long-length in the same flag run.
105-
// Many streams set only one nibble here per long length. We handle both cases:
102+
var lenCtl = compressed[src++];
103+
nibble = lenCtl & 0x0F;
104+
cachedLenNibble = (lenCtl >> 4) & 0x0F;
105+
}
106106

107-
int firstNibble = lenNibbleLow;
108-
if (firstNibble != 15)
107+
if (nibble != 15)
109108
{
110-
matchLen = 3 + 7 + firstNibble;
109+
matchLen = 3 + 7 + nibble;
111110
}
112111
else
113112
{
114-
// Need extra bytes:
115-
// Next: 1 byte (0..254) or 255 sentinel → then 2 bytes (or 4 on huge) per spec.
116113
if (src >= compressed.Length)
117114
{
118115
return false;
119116
}
120117

121118
int b = compressed[src++];
119+
122120
if (b != 255)
123121
{
124122
matchLen = 3 + 7 + 15 + b;
@@ -130,12 +128,13 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
130128
return false;
131129
}
132130

133-
ushort len16 = BinaryPrimitives.ReadUInt16LittleEndian(compressed.Slice(src, 2));
131+
var len16 = BinaryPrimitives.ReadUInt16LittleEndian(compressed.Slice(src, 2));
134132
src += 2;
135133

136134
if (len16 != 0)
137135
{
138-
matchLen = len16; // already includes the + (3+7+15) per spec’s encoder logic
136+
// For XPRESS, this is effectively "length minus 3" in many emitters.
137+
matchLen = len16 + 3;
139138
}
140139
else
141140
{
@@ -144,15 +143,12 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
144143
return false;
145144
}
146145

147-
matchLen = BinaryPrimitives.ReadInt32LittleEndian(compressed.Slice(src, 4));
146+
var len32 = BinaryPrimitives.ReadInt32LittleEndian(compressed.Slice(src, 4));
148147
src += 4;
148+
matchLen = len32 + 3;
149149
}
150150
}
151151
}
152-
153-
// Note: If your corpus actually uses the “reused high nibble” packing described in MS-XCA,
154-
// you can enhance this decoder to cache lenNibbleHigh and apply it to the next long length.
155-
// The above version is tolerant and works with standard Windows XPRESS emitters. :contentReference[oaicite:4]{index=4}
156152
}
157153

158154
// Copy match
@@ -167,13 +163,13 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
167163
}
168164

169165
// Bounds check (stream might claim more than remaining output)
170-
int toCopy = matchLen;
166+
var toCopy = matchLen;
171167
if (decompressedSize + toCopy > expectedSize)
172168
{
173169
return false;
174170
}
175171

176-
int srcPos = decompressedSize - matchOffset;
172+
var srcPos = decompressedSize - matchOffset;
177173
// Overlap-safe copy
178174
while (toCopy-- > 0)
179175
{
@@ -193,165 +189,9 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
193189
/// <exception cref="InvalidDataException">On malformed input</exception>
194190
public static void Decompress(ReadOnlySpan<byte> compressed, Span<byte> output)
195191
{
196-
// We implement the MS-XCA “fastest variant / Plain LZ77” decoder.
197-
// Ref: MS-XCA §2.3.4 Processing (literal/match flags in 32-bit chunks; 16-bit match token). :contentReference[oaicite:3]{index=3}
198-
199-
var uncompressedSize = output.Length;
200-
int src = 0, dst = 0;
201-
202-
while (dst < uncompressedSize)
192+
if (!TryDecompress(compressed, output, out var decompressedSize) || decompressedSize != output.Length)
203193
{
204-
if (src + 4 > compressed.Length)
205-
{
206-
throw new InvalidDataException("Unexpected end of input when reading flags.");
207-
}
208-
209-
// Flags are processed MSB → LSB (we write them, then consume from the high bit).
210-
uint flags = BinaryPrimitives.ReadUInt32LittleEndian(compressed.Slice(src, 4));
211-
src += 4;
212-
213-
for (int i = 0; i < 32; i++)
214-
{
215-
if (dst >= uncompressedSize)
216-
{
217-
break; // Done
218-
}
219-
220-
bool isMatch = (flags & 0x8000_0000u) != 0;
221-
flags <<= 1;
222-
223-
if (!isMatch)
224-
{
225-
// Literal
226-
if (src >= compressed.Length)
227-
{
228-
throw new InvalidDataException("Unexpected end of input in literal.");
229-
}
230-
231-
output[dst++] = compressed[src++];
232-
continue;
233-
}
234-
235-
// Match: first 2 bytes are the primary token
236-
if (src + 2 > compressed.Length)
237-
{
238-
throw new InvalidDataException("Unexpected end of input in match token.");
239-
}
240-
241-
ushort token = BinaryPrimitives.ReadUInt16LittleEndian(compressed.Slice(src, 2));
242-
src += 2;
243-
244-
int matchOffset = ((token >> 3) & 0x1FFF) + 1; // 13 bits + bias
245-
int lenMinus3 = (token & 0x7); // 3 low bits
246-
int matchLen;
247-
248-
if (lenMinus3 < 7)
249-
{
250-
matchLen = lenMinus3 + 3;
251-
}
252-
else
253-
{
254-
// Extended length path: the next nibble (4 bits) comes in a packed scheme.
255-
// MS-XCA describes a “half-byte” reuse; we follow the encoder’s layout:
256-
// First we read a single byte that contributes 4 or 8 bits depending on reuse;
257-
// Simpler to implement decoder-side as: read a nibble from next byte,
258-
// If nibble==15, drop into the byte(s) extension path.
259-
260-
// We read one “length control” byte that holds one or two 4-bit fields.
261-
if (src >= compressed.Length)
262-
{
263-
throw new InvalidDataException("Unexpected end of input in length nibble.");
264-
}
265-
266-
byte lenCtl = compressed[src++];
267-
int lenNibbleLow = (lenCtl & 0x0F);
268-
269-
// The encoder may pack two 4-bit values across matches;
270-
// to keep decoder robust, consume low nibble first, then high nibble
271-
// on the next long-length in the same flag run.
272-
// Many streams set only one nibble here per long length.
273-
// int lenNibbleHigh = (lenCtl >> 4) & 0x0F;
274-
275-
// The encoder may pack two 4-bit values across matches; to keep decoder robust,
276-
// consume low nibble first, then high nibble on the *next* long-length in the same flag run.
277-
// Many streams set only one nibble here per long length. We handle both cases:
278-
279-
int firstNibble = lenNibbleLow;
280-
if (firstNibble != 15)
281-
{
282-
matchLen = 3 + 7 + firstNibble;
283-
}
284-
else
285-
{
286-
// Need extra bytes:
287-
// Next: 1 byte (0..254) or 255 sentinel → then 2 bytes (or 4 on huge) per spec.
288-
if (src >= compressed.Length)
289-
{
290-
throw new InvalidDataException("Unexpected end of input in length ext.");
291-
}
292-
293-
int b = compressed[src++];
294-
if (b != 255)
295-
{
296-
matchLen = 3 + 7 + 15 + b;
297-
}
298-
else
299-
{
300-
if (src + 2 > compressed.Length)
301-
{
302-
throw new InvalidDataException("Unexpected end of input in length 16-bit.");
303-
}
304-
305-
ushort len16 = BinaryPrimitives.ReadUInt16LittleEndian(compressed.Slice(src, 2));
306-
src += 2;
307-
308-
if (len16 != 0)
309-
{
310-
matchLen = len16; // already includes the + (3+7+15) per spec’s encoder logic
311-
}
312-
else
313-
{
314-
if (src + 4 > compressed.Length)
315-
{
316-
throw new InvalidDataException("Unexpected end of input in length 32-bit.");
317-
}
318-
319-
matchLen = BinaryPrimitives.ReadInt32LittleEndian(compressed.Slice(src, 4));
320-
src += 4;
321-
}
322-
}
323-
}
324-
325-
// Note: If your corpus actually uses the “reused high nibble” packing described in MS-XCA,
326-
// you can enhance this decoder to cache lenNibbleHigh and apply it to the next long length.
327-
// The above version is tolerant and works with standard Windows XPRESS emitters. :contentReference[oaicite:4]{index=4}
328-
}
329-
330-
// Copy match
331-
if (matchOffset <= 0 || matchOffset > 8192)
332-
{
333-
throw new InvalidDataException($"Invalid match offset {matchOffset}.");
334-
}
335-
336-
if (dst < matchOffset)
337-
{
338-
throw new InvalidDataException("Match points before start of output.");
339-
}
340-
341-
// Bounds check (stream might claim more than remaining output)
342-
int toCopy = matchLen;
343-
if (dst + toCopy > uncompressedSize)
344-
{
345-
throw new InvalidDataException("Match overruns output buffer.");
346-
}
347-
348-
int srcPos = dst - matchOffset;
349-
// Overlap-safe copy
350-
while (toCopy-- > 0)
351-
{
352-
output[dst++] = output[srcPos++];
353-
}
354-
}
194+
throw new InvalidDataException("Malformed XPRESS compressed data");
355195
}
356196
}
357197
}

Library/DiscUtils.Core/Compression/XpressStream.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ private byte[] Buffer(int count)
187187
len += 3;
188188

189189
// Simply do the copy
190-
for (var i = 0; i < len; ++i)
190+
for (var i = 0; i < len && numRead < count; ++i)
191191
{
192192
buffer[numRead] = buffer[numRead - offset - 1];
193193
numRead++;

0 commit comments

Comments
 (0)