11using System ;
22using System . Buffers . Binary ;
3+ using System . Diagnostics ;
34using System . IO ;
45
56namespace DiscUtils . Compression ;
67
78public sealed class XpressLz77 : IBlockDecompressor
89{
10+ public static XpressLz77 Default { get ; } = new ( ) ;
11+
912 int IBlockDecompressor . BlockSize { get ; set ; }
1013
1114 bool IBlockDecompressor . TryDecompress ( ReadOnlySpan < byte > compressed , Span < byte > output , out int decompressedSize )
@@ -24,8 +27,9 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
2427 // Ref: MS-XCA §2.3.4 Processing (literal/match flags in 32-bit chunks; 16-bit match token). :contentReference[oaicite:3]{index=3}
2528
2629 var expectedSize = output . Length ;
27- int src = 0 ;
30+ var src = 0 ;
2831 decompressedSize = 0 ;
32+ var cachedLenNibble = - 1 ; // -1 = empty; otherwise 0..15
2933
3034 while ( decompressedSize < expectedSize )
3135 {
@@ -35,17 +39,17 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
3539 }
3640
3741 // Flags are processed MSB → LSB (we write them, then consume from the high bit).
38- uint flags = BinaryPrimitives . ReadUInt32LittleEndian ( compressed . Slice ( src , 4 ) ) ;
42+ var flags = BinaryPrimitives . ReadUInt32LittleEndian ( compressed . Slice ( src , 4 ) ) ;
3943 src += 4 ;
4044
41- for ( int i = 0 ; i < 32 ; i ++ )
45+ for ( var i = 0 ; i < 32 ; i ++ )
4246 {
4347 if ( decompressedSize >= expectedSize )
4448 {
4549 break ; // Done
4650 }
4751
48- bool isMatch = ( flags & 0x8000_0000u ) != 0 ;
52+ var isMatch = ( flags & 0x8000_0000u ) != 0 ;
4953 flags <<= 1 ;
5054
5155 if ( ! isMatch )
@@ -57,6 +61,7 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
5761 }
5862
5963 output [ decompressedSize ++ ] = compressed [ src ++ ] ;
64+
6065 continue ;
6166 }
6267
@@ -66,11 +71,11 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
6671 return false ;
6772 }
6873
69- ushort token = BinaryPrimitives . ReadUInt16LittleEndian ( compressed . Slice ( src , 2 ) ) ;
74+ var token = BinaryPrimitives . ReadUInt16LittleEndian ( compressed . Slice ( src , 2 ) ) ;
7075 src += 2 ;
7176
72- int matchOffset = ( ( token >> 3 ) & 0x1FFF ) + 1 ; // 13 bits + bias
73- int lenMinus3 = ( token & 0x7 ) ; // 3 low bits
77+ var matchOffset = ( ( token >> 3 ) & 0x1FFF ) + 1 ; // 13 bits + bias
78+ var lenMinus3 = ( token & 0x7 ) ; // 3 low bits
7479 int matchLen ;
7580
7681 if ( lenMinus3 < 7 )
@@ -79,46 +84,39 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
7984 }
8085 else
8186 {
82- // Extended length path: the next nibble (4 bits) comes in a packed scheme.
83- // MS-XCA describes a “half-byte” reuse; we follow the encoder’s layout:
84- // First we read a single byte that contributes 4 or 8 bits depending on reuse;
85- // Simpler to implement decoder-side as: read a nibble from next byte,
86- // If nibble==15, drop into the byte(s) extension path.
87-
88- // We read one “length control” byte that holds one or two 4-bit fields.
89- if ( src >= compressed . Length )
87+ // Extended length: consume a 4-bit nibble that is packed 2-per-byte.
88+ // We must reuse the high nibble on every second extended-length match.
89+ int nibble ;
90+ if ( cachedLenNibble >= 0 )
9091 {
91- return false ;
92+ nibble = cachedLenNibble ;
93+ cachedLenNibble = - 1 ;
9294 }
95+ else
96+ {
97+ if ( src >= compressed . Length )
98+ {
99+ return false ;
100+ }
93101
94- byte lenCtl = compressed [ src ++ ] ;
95- int lenNibbleLow = ( lenCtl & 0x0F ) ;
96-
97- // The encoder may pack two 4-bit values across matches;
98- // to keep decoder robust, consume low nibble first, then high nibble
99- // on the next long-length in the same flag run.
100- // Many streams set only one nibble here per long length.
101- // int lenNibbleHigh = (lenCtl >> 4) & 0x0F;
102-
103- // The encoder may pack two 4-bit values across matches; to keep decoder robust,
104- // consume low nibble first, then high nibble on the *next* long-length in the same flag run.
105- // Many streams set only one nibble here per long length. We handle both cases:
102+ var lenCtl = compressed [ src ++ ] ;
103+ nibble = lenCtl & 0x0F ;
104+ cachedLenNibble = ( lenCtl >> 4 ) & 0x0F ;
105+ }
106106
107- int firstNibble = lenNibbleLow ;
108- if ( firstNibble != 15 )
107+ if ( nibble != 15 )
109108 {
110- matchLen = 3 + 7 + firstNibble ;
109+ matchLen = 3 + 7 + nibble ;
111110 }
112111 else
113112 {
114- // Need extra bytes:
115- // Next: 1 byte (0..254) or 255 sentinel → then 2 bytes (or 4 on huge) per spec.
116113 if ( src >= compressed . Length )
117114 {
118115 return false ;
119116 }
120117
121118 int b = compressed [ src ++ ] ;
119+
122120 if ( b != 255 )
123121 {
124122 matchLen = 3 + 7 + 15 + b ;
@@ -130,12 +128,13 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
130128 return false ;
131129 }
132130
133- ushort len16 = BinaryPrimitives . ReadUInt16LittleEndian ( compressed . Slice ( src , 2 ) ) ;
131+ var len16 = BinaryPrimitives . ReadUInt16LittleEndian ( compressed . Slice ( src , 2 ) ) ;
134132 src += 2 ;
135133
136134 if ( len16 != 0 )
137135 {
138- matchLen = len16 ; // already includes the + (3+7+15) per spec’s encoder logic
136+ // For XPRESS, this is effectively "length minus 3" in many emitters.
137+ matchLen = len16 + 3 ;
139138 }
140139 else
141140 {
@@ -144,15 +143,12 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
144143 return false ;
145144 }
146145
147- matchLen = BinaryPrimitives . ReadInt32LittleEndian ( compressed . Slice ( src , 4 ) ) ;
146+ var len32 = BinaryPrimitives . ReadInt32LittleEndian ( compressed . Slice ( src , 4 ) ) ;
148147 src += 4 ;
148+ matchLen = len32 + 3 ;
149149 }
150150 }
151151 }
152-
153- // Note: If your corpus actually uses the “reused high nibble” packing described in MS-XCA,
154- // you can enhance this decoder to cache lenNibbleHigh and apply it to the next long length.
155- // The above version is tolerant and works with standard Windows XPRESS emitters. :contentReference[oaicite:4]{index=4}
156152 }
157153
158154 // Copy match
@@ -167,13 +163,13 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
167163 }
168164
169165 // Bounds check (stream might claim more than remaining output)
170- int toCopy = matchLen ;
166+ var toCopy = matchLen ;
171167 if ( decompressedSize + toCopy > expectedSize )
172168 {
173169 return false ;
174170 }
175171
176- int srcPos = decompressedSize - matchOffset ;
172+ var srcPos = decompressedSize - matchOffset ;
177173 // Overlap-safe copy
178174 while ( toCopy -- > 0 )
179175 {
@@ -193,165 +189,9 @@ public static bool TryDecompress(ReadOnlySpan<byte> compressed, Span<byte> outpu
193189 /// <exception cref="InvalidDataException">On malformed input</exception>
194190 public static void Decompress ( ReadOnlySpan < byte > compressed , Span < byte > output )
195191 {
196- // We implement the MS-XCA “fastest variant / Plain LZ77” decoder.
197- // Ref: MS-XCA §2.3.4 Processing (literal/match flags in 32-bit chunks; 16-bit match token). :contentReference[oaicite:3]{index=3}
198-
199- var uncompressedSize = output . Length ;
200- int src = 0 , dst = 0 ;
201-
202- while ( dst < uncompressedSize )
192+ if ( ! TryDecompress ( compressed , output , out var decompressedSize ) || decompressedSize != output . Length )
203193 {
204- if ( src + 4 > compressed . Length )
205- {
206- throw new InvalidDataException ( "Unexpected end of input when reading flags." ) ;
207- }
208-
209- // Flags are processed MSB → LSB (we write them, then consume from the high bit).
210- uint flags = BinaryPrimitives . ReadUInt32LittleEndian ( compressed . Slice ( src , 4 ) ) ;
211- src += 4 ;
212-
213- for ( int i = 0 ; i < 32 ; i ++ )
214- {
215- if ( dst >= uncompressedSize )
216- {
217- break ; // Done
218- }
219-
220- bool isMatch = ( flags & 0x8000_0000u ) != 0 ;
221- flags <<= 1 ;
222-
223- if ( ! isMatch )
224- {
225- // Literal
226- if ( src >= compressed . Length )
227- {
228- throw new InvalidDataException ( "Unexpected end of input in literal." ) ;
229- }
230-
231- output [ dst ++ ] = compressed [ src ++ ] ;
232- continue ;
233- }
234-
235- // Match: first 2 bytes are the primary token
236- if ( src + 2 > compressed . Length )
237- {
238- throw new InvalidDataException ( "Unexpected end of input in match token." ) ;
239- }
240-
241- ushort token = BinaryPrimitives . ReadUInt16LittleEndian ( compressed . Slice ( src , 2 ) ) ;
242- src += 2 ;
243-
244- int matchOffset = ( ( token >> 3 ) & 0x1FFF ) + 1 ; // 13 bits + bias
245- int lenMinus3 = ( token & 0x7 ) ; // 3 low bits
246- int matchLen ;
247-
248- if ( lenMinus3 < 7 )
249- {
250- matchLen = lenMinus3 + 3 ;
251- }
252- else
253- {
254- // Extended length path: the next nibble (4 bits) comes in a packed scheme.
255- // MS-XCA describes a “half-byte” reuse; we follow the encoder’s layout:
256- // First we read a single byte that contributes 4 or 8 bits depending on reuse;
257- // Simpler to implement decoder-side as: read a nibble from next byte,
258- // If nibble==15, drop into the byte(s) extension path.
259-
260- // We read one “length control” byte that holds one or two 4-bit fields.
261- if ( src >= compressed . Length )
262- {
263- throw new InvalidDataException ( "Unexpected end of input in length nibble." ) ;
264- }
265-
266- byte lenCtl = compressed [ src ++ ] ;
267- int lenNibbleLow = ( lenCtl & 0x0F ) ;
268-
269- // The encoder may pack two 4-bit values across matches;
270- // to keep decoder robust, consume low nibble first, then high nibble
271- // on the next long-length in the same flag run.
272- // Many streams set only one nibble here per long length.
273- // int lenNibbleHigh = (lenCtl >> 4) & 0x0F;
274-
275- // The encoder may pack two 4-bit values across matches; to keep decoder robust,
276- // consume low nibble first, then high nibble on the *next* long-length in the same flag run.
277- // Many streams set only one nibble here per long length. We handle both cases:
278-
279- int firstNibble = lenNibbleLow ;
280- if ( firstNibble != 15 )
281- {
282- matchLen = 3 + 7 + firstNibble ;
283- }
284- else
285- {
286- // Need extra bytes:
287- // Next: 1 byte (0..254) or 255 sentinel → then 2 bytes (or 4 on huge) per spec.
288- if ( src >= compressed . Length )
289- {
290- throw new InvalidDataException ( "Unexpected end of input in length ext." ) ;
291- }
292-
293- int b = compressed [ src ++ ] ;
294- if ( b != 255 )
295- {
296- matchLen = 3 + 7 + 15 + b ;
297- }
298- else
299- {
300- if ( src + 2 > compressed . Length )
301- {
302- throw new InvalidDataException ( "Unexpected end of input in length 16-bit." ) ;
303- }
304-
305- ushort len16 = BinaryPrimitives . ReadUInt16LittleEndian ( compressed . Slice ( src , 2 ) ) ;
306- src += 2 ;
307-
308- if ( len16 != 0 )
309- {
310- matchLen = len16 ; // already includes the + (3+7+15) per spec’s encoder logic
311- }
312- else
313- {
314- if ( src + 4 > compressed . Length )
315- {
316- throw new InvalidDataException ( "Unexpected end of input in length 32-bit." ) ;
317- }
318-
319- matchLen = BinaryPrimitives . ReadInt32LittleEndian ( compressed . Slice ( src , 4 ) ) ;
320- src += 4 ;
321- }
322- }
323- }
324-
325- // Note: If your corpus actually uses the “reused high nibble” packing described in MS-XCA,
326- // you can enhance this decoder to cache lenNibbleHigh and apply it to the next long length.
327- // The above version is tolerant and works with standard Windows XPRESS emitters. :contentReference[oaicite:4]{index=4}
328- }
329-
330- // Copy match
331- if ( matchOffset <= 0 || matchOffset > 8192 )
332- {
333- throw new InvalidDataException ( $ "Invalid match offset { matchOffset } .") ;
334- }
335-
336- if ( dst < matchOffset )
337- {
338- throw new InvalidDataException ( "Match points before start of output." ) ;
339- }
340-
341- // Bounds check (stream might claim more than remaining output)
342- int toCopy = matchLen ;
343- if ( dst + toCopy > uncompressedSize )
344- {
345- throw new InvalidDataException ( "Match overruns output buffer." ) ;
346- }
347-
348- int srcPos = dst - matchOffset ;
349- // Overlap-safe copy
350- while ( toCopy -- > 0 )
351- {
352- output [ dst ++ ] = output [ srcPos ++ ] ;
353- }
354- }
194+ throw new InvalidDataException ( "Malformed XPRESS compressed data" ) ;
355195 }
356196 }
357197}
0 commit comments