@@ -30,7 +30,7 @@ const int HuffmanDecoder::TABLE_MASK = (1 << DECODING_BATCH_SIZE) - 1;
3030
3131// The chunk size indicates how many bytes are encoded (per block) before
3232// resetting the frequency stats.
33- HuffmanDecoder::HuffmanDecoder (InputBitStream& bitstream, int chunkSize) : _bitstream(bitstream)
33+ HuffmanDecoder::HuffmanDecoder (InputBitStream& bitstream, Context* pCtx, int chunkSize) : _bitstream(bitstream)
3434{
3535 if (chunkSize < 1024 )
3636 throw invalid_argument (" Huffman codec: The chunk size must be at least 1024" );
@@ -44,6 +44,7 @@ HuffmanDecoder::HuffmanDecoder(InputBitStream& bitstream, int chunkSize) : _bits
4444 _chunkSize = chunkSize;
4545 _buffer = new byte[0 ];
4646 _bufferSize = 0 ;
47+ _pCtx = pCtx;
4748 reset ();
4849}
4950
@@ -108,7 +109,7 @@ int HuffmanDecoder::readLengths()
108109// max(CodeLen) must be <= MAX_SYMBOL_SIZE
109110bool HuffmanDecoder::buildDecodingTable (int count)
110111{
111- // Initialize table with non zero value .
112+ // Initialize table with non zero values .
112113 // If the bitstream is altered, the decoder may access these default table values.
113114 // The number of consumed bits cannot be 0.
114115 memset (_table, 8 , sizeof (_table));
@@ -138,6 +139,187 @@ bool HuffmanDecoder::buildDecodingTable(int count)
138139}
139140
140141int HuffmanDecoder::decode (byte block[], uint blkptr, uint count)
142+ {
143+ int bsVersion = _pCtx == nullptr ? 6 : _pCtx->getInt (" bsVersion" , 6 );
144+
145+ if (bsVersion < 6 )
146+ return decodeV5 (block, blkptr, count);
147+
148+ return decodeV6 (block, blkptr, count);
149+ }
150+
151+
152+ int HuffmanDecoder::decodeV6 (byte block[], uint blkptr, uint count)
153+ {
154+ if (count == 0 )
155+ return 0 ;
156+
157+ const uint minBufSize = 2 * uint (_chunkSize);
158+
159+ if (_bufferSize < minBufSize) {
160+ delete[] _buffer;
161+ _bufferSize = minBufSize;
162+ _buffer = new byte[_bufferSize];
163+ }
164+
165+ uint startChunk = blkptr;
166+ const uint end = blkptr + count;
167+
168+ while (startChunk < end) {
169+ const uint sizeChunk = min (uint (_chunkSize), end - startChunk);
170+
171+ if (sizeChunk < 32 ) {
172+ // Special case for small chunks
173+ _bitstream.readBits (&block[startChunk], 8 * sizeChunk);
174+ }
175+ else {
176+ // For each chunk, read code lengths, rebuild codes, rebuild decoding table
177+ const int alphabetSize = readLengths ();
178+
179+ if (alphabetSize <= 0 )
180+ return startChunk - blkptr;
181+
182+ if (alphabetSize == 1 ) {
183+ // Shortcut for chunks with only one symbol
184+ memset (&block[startChunk], _alphabet[0 ], size_t (sizeChunk));
185+ }
186+ else {
187+ if (buildDecodingTable (alphabetSize) == false )
188+ return -1 ;
189+
190+ if (decodeChunk (&block[startChunk], sizeChunk) == false )
191+ break ;
192+ }
193+ }
194+
195+ startChunk += sizeChunk;
196+ }
197+
198+ return count;
199+ }
200+
201+ // count is at least 32
202+ bool HuffmanDecoder::decodeChunk (byte block[], uint count)
203+ {
204+ // Read fragment sizes
205+ const int szBits0 = EntropyUtils::readVarInt (_bitstream);
206+ const int szBits1 = EntropyUtils::readVarInt (_bitstream);
207+ const int szBits2 = EntropyUtils::readVarInt (_bitstream);
208+ const int szBits3 = EntropyUtils::readVarInt (_bitstream);
209+
210+ if ((szBits0 < 0 ) || (szBits1 < 0 ) || (szBits2 < 0 ) || (szBits3 < 0 ))
211+ return false ;
212+
213+ memset (_buffer, 0 , _bufferSize);
214+
215+ int idx0 = 0 * (_bufferSize / 4 );
216+ int idx1 = 1 * (_bufferSize / 4 );
217+ int idx2 = 2 * (_bufferSize / 4 );
218+ int idx3 = 3 * (_bufferSize / 4 );
219+
220+ // Read all compressed data from bitstream
221+ _bitstream.readBits (&_buffer[idx0], szBits0);
222+ _bitstream.readBits (&_buffer[idx1], szBits1);
223+ _bitstream.readBits (&_buffer[idx2], szBits2);
224+ _bitstream.readBits (&_buffer[idx3], szBits3);
225+
226+ // State variables for each of the four parallel streams
227+ uint64 state0 = 0 , state1 = 0 , state2 = 0 , state3 = 0 ; // bits read from bitstream
228+ uint8 bits0 = 0 , bits1 = 0 , bits2 = 0 , bits3 = 0 ; // number of available bits in state
229+ uint8 bs0, bs1, bs2, bs3, shift;
230+
231+ #define READ_STATE (shift, state, idx, bits, bs ) \
232+ shift = (56 - bits) & -8 ; \
233+ bs = bits + shift - DECODING_BATCH_SIZE; \
234+ state = (state << shift) | (uint64 (BigEndian::readLong64 (&_buffer[idx])) >> 1 >> (63 - shift)); /* handle shift = 0 */ \
235+ idx += (shift >> 3 );
236+
237+ const int szFrag = count / 4 ;
238+ byte* block0 = &block[0 * szFrag];
239+ byte* block1 = &block[1 * szFrag];
240+ byte* block2 = &block[2 * szFrag];
241+ byte* block3 = &block[3 * szFrag];
242+ int n = 0 ;
243+
244+ while (n < szFrag - 4 ) {
245+ // Fill 64 bits of state from the bitstream for each stream
246+ READ_STATE (shift, state0, idx0, bits0, bs0);
247+ READ_STATE (shift, state1, idx1, bits1, bs1);
248+ READ_STATE (shift, state2, idx2, bits2, bs2);
249+ READ_STATE (shift, state3, idx3, bits3, bs3);
250+
251+ // Decompress 4 symbols per stream
252+ const uint16 val00 = _table[(state0 >> bs0) & TABLE_MASK]; bs0 -= uint8 (val00);
253+ const uint16 val10 = _table[(state1 >> bs1) & TABLE_MASK]; bs1 -= uint8 (val10);
254+ const uint16 val20 = _table[(state2 >> bs2) & TABLE_MASK]; bs2 -= uint8 (val20);
255+ const uint16 val30 = _table[(state3 >> bs3) & TABLE_MASK]; bs3 -= uint8 (val30);
256+ const uint16 val01 = _table[(state0 >> bs0) & TABLE_MASK]; bs0 -= uint8 (val01);
257+ const uint16 val11 = _table[(state1 >> bs1) & TABLE_MASK]; bs1 -= uint8 (val11);
258+ const uint16 val21 = _table[(state2 >> bs2) & TABLE_MASK]; bs2 -= uint8 (val21);
259+ const uint16 val31 = _table[(state3 >> bs3) & TABLE_MASK]; bs3 -= uint8 (val31);
260+ const uint16 val02 = _table[(state0 >> bs0) & TABLE_MASK]; bs0 -= uint8 (val02);
261+ const uint16 val12 = _table[(state1 >> bs1) & TABLE_MASK]; bs1 -= uint8 (val12);
262+ const uint16 val22 = _table[(state2 >> bs2) & TABLE_MASK]; bs2 -= uint8 (val22);
263+ const uint16 val32 = _table[(state3 >> bs3) & TABLE_MASK]; bs3 -= uint8 (val32);
264+ const uint16 val03 = _table[(state0 >> bs0) & TABLE_MASK]; bs0 -= uint8 (val03);
265+ const uint16 val13 = _table[(state1 >> bs1) & TABLE_MASK]; bs1 -= uint8 (val13);
266+ const uint16 val23 = _table[(state2 >> bs2) & TABLE_MASK]; bs2 -= uint8 (val23);
267+ const uint16 val33 = _table[(state3 >> bs3) & TABLE_MASK]; bs3 -= uint8 (val33);
268+
269+ bits0 = bs0 + DECODING_BATCH_SIZE;
270+ bits1 = bs1 + DECODING_BATCH_SIZE;
271+ bits2 = bs2 + DECODING_BATCH_SIZE;
272+ bits3 = bs3 + DECODING_BATCH_SIZE;
273+
274+ block0[n + 0 ] = byte (val00 >> 8 );
275+ block1[n + 0 ] = byte (val10 >> 8 );
276+ block2[n + 0 ] = byte (val20 >> 8 );
277+ block3[n + 0 ] = byte (val30 >> 8 );
278+ block0[n + 1 ] = byte (val01 >> 8 );
279+ block1[n + 1 ] = byte (val11 >> 8 );
280+ block2[n + 1 ] = byte (val21 >> 8 );
281+ block3[n + 1 ] = byte (val31 >> 8 );
282+ block0[n + 2 ] = byte (val02 >> 8 );
283+ block1[n + 2 ] = byte (val12 >> 8 );
284+ block2[n + 2 ] = byte (val22 >> 8 );
285+ block3[n + 2 ] = byte (val32 >> 8 );
286+ block0[n + 3 ] = byte (val03 >> 8 );
287+ block1[n + 3 ] = byte (val13 >> 8 );
288+ block2[n + 3 ] = byte (val23 >> 8 );
289+ block3[n + 3 ] = byte (val33 >> 8 );
290+ n += 4 ;
291+ }
292+
293+ // Fill 64 bits of state from the bitstream for each stream
294+ READ_STATE (shift, state0, idx0, bits0, bs0);
295+ READ_STATE (shift, state1, idx1, bits1, bs1);
296+ READ_STATE (shift, state2, idx2, bits2, bs2);
297+ READ_STATE (shift, state3, idx3, bits3, bs3);
298+
299+ while (n < szFrag) {
300+ // Decompress 1 symbol per stream
301+ const uint16 val0 = _table[(state0 >> bs0) & TABLE_MASK]; bs0 -= uint8 (val0);
302+ const uint16 val1 = _table[(state1 >> bs1) & TABLE_MASK]; bs1 -= uint8 (val1);
303+ const uint16 val2 = _table[(state2 >> bs2) & TABLE_MASK]; bs2 -= uint8 (val2);
304+ const uint16 val3 = _table[(state3 >> bs3) & TABLE_MASK]; bs3 -= uint8 (val3);
305+
306+ block0[n] = byte (val0 >> 8 );
307+ block1[n] = byte (val1 >> 8 );
308+ block2[n] = byte (val2 >> 8 );
309+ block3[n] = byte (val3 >> 8 );
310+ n++;
311+ }
312+
313+ // Process any remaining bytes at the end of the whole chunk
314+ const uint count4 = 4 * szFrag;
315+
316+ for (uint i = count4; i < count; i++)
317+ block[i] = byte (_bitstream.readBits (8 ));
318+
319+ return true ;
320+ }
321+
322+ int HuffmanDecoder::decodeV5 (byte block[], uint blkptr, uint count)
141323{
142324 if (count == 0 )
143325 return 0 ;
@@ -230,7 +412,7 @@ int HuffmanDecoder::decode(byte block[], uint blkptr, uint count)
230412
231413 // Sanity check
232414 if (bits > 64 )
233- return n;
415+ return n;
234416
235417 uint16 val;
236418
@@ -249,4 +431,3 @@ int HuffmanDecoder::decode(byte block[], uint blkptr, uint count)
249431
250432 return count;
251433}
252-
0 commit comments