Skip to content

Commit 251492b

Browse files
committed
Add zlib decompressor
1 parent eaebb86 commit 251492b

File tree

8 files changed

+1090
-1
lines changed

8 files changed

+1090
-1
lines changed
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.airlift.compress.zlib;
15+
16+
import io.airlift.compress.Decompressor;
17+
import io.airlift.compress.MalformedInputException;
18+
19+
import java.nio.Buffer;
20+
import java.nio.ByteBuffer;
21+
22+
import static io.airlift.compress.zlib.UnsafeUtil.getAddress;
23+
import static java.lang.String.format;
24+
import static java.util.Objects.requireNonNull;
25+
import static sun.misc.Unsafe.ARRAY_BYTE_BASE_OFFSET;
26+
27+
public class InflateDecompressor
28+
implements Decompressor
29+
{
30+
@Override
31+
public int decompress(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, int maxOutputLength)
32+
throws MalformedInputException
33+
{
34+
verifyRange(input, inputOffset, inputLength);
35+
verifyRange(output, outputOffset, maxOutputLength);
36+
37+
long inputAddress = ARRAY_BYTE_BASE_OFFSET + inputOffset;
38+
long inputLimit = inputAddress + inputLength;
39+
long outputAddress = ARRAY_BYTE_BASE_OFFSET + outputOffset;
40+
long outputLimit = outputAddress + maxOutputLength;
41+
42+
return InflateRawDecompressor.decompress(input, inputAddress, inputLimit, output, outputAddress, outputLimit);
43+
}
44+
45+
@Override
46+
public void decompress(ByteBuffer inputBuffer, ByteBuffer outputBuffer)
47+
throws MalformedInputException
48+
{
49+
// Java 9+ added an overload of various methods in ByteBuffer. When compiling with Java 11+ and targeting Java 8 bytecode
50+
// the resulting signatures are invalid for JDK 8, so accesses below result in NoSuchMethodError. Accessing the
51+
// methods through the interface class works around the problem
52+
// Sidenote: we can't target "javac --release 8" because Unsafe is not available in the signature data for that profile
53+
Buffer input = inputBuffer;
54+
Buffer output = outputBuffer;
55+
56+
Object inputBase;
57+
long inputAddress;
58+
long inputLimit;
59+
if (input.isDirect()) {
60+
inputBase = null;
61+
long address = getAddress(input);
62+
inputAddress = address + input.position();
63+
inputLimit = address + input.limit();
64+
}
65+
else if (input.hasArray()) {
66+
inputBase = input.array();
67+
inputAddress = ARRAY_BYTE_BASE_OFFSET + input.arrayOffset() + input.position();
68+
inputLimit = ARRAY_BYTE_BASE_OFFSET + input.arrayOffset() + input.limit();
69+
}
70+
else {
71+
throw new IllegalArgumentException("Unsupported input ByteBuffer implementation " + input.getClass().getName());
72+
}
73+
74+
Object outputBase;
75+
long outputAddress;
76+
long outputLimit;
77+
if (output.isDirect()) {
78+
outputBase = null;
79+
long address = getAddress(output);
80+
outputAddress = address + output.position();
81+
outputLimit = address + output.limit();
82+
}
83+
else if (output.hasArray()) {
84+
outputBase = output.array();
85+
outputAddress = ARRAY_BYTE_BASE_OFFSET + output.arrayOffset() + output.position();
86+
outputLimit = ARRAY_BYTE_BASE_OFFSET + output.arrayOffset() + output.limit();
87+
}
88+
else {
89+
throw new IllegalArgumentException("Unsupported output ByteBuffer implementation " + output.getClass().getName());
90+
}
91+
92+
// HACK: Assure JVM does not collect Slice wrappers while decompressing, since the
93+
// collection may trigger freeing of the underlying memory resulting in a segfault
94+
// There is no other known way to signal to the JVM that an object should not be
95+
// collected in a block, and technically, the JVM is allowed to eliminate these locks.
96+
synchronized (input) {
97+
synchronized (output) {
98+
int written = InflateRawDecompressor.decompress(inputBase, inputAddress, inputLimit, outputBase, outputAddress, outputLimit);
99+
output.position(output.position() + written);
100+
}
101+
}
102+
}
103+
104+
private static void verifyRange(byte[] data, int offset, int length)
105+
{
106+
requireNonNull(data, "data is null");
107+
if (offset < 0 || length < 0 || offset + length > data.length) {
108+
throw new IllegalArgumentException(format("Invalid offset or length (%s, %s) in array of length %s", offset, length, data.length));
109+
}
110+
}
111+
}
Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.airlift.compress.zlib;
15+
16+
import io.airlift.compress.MalformedInputException;
17+
import io.airlift.compress.zlib.InflateTables.CodeType;
18+
import io.airlift.compress.zlib.InflateTables.InflateTable;
19+
20+
import static io.airlift.compress.zlib.InflateTables.END_OF_BLOCK;
21+
import static io.airlift.compress.zlib.InflateTables.ENOUGH_DISTANCES;
22+
import static io.airlift.compress.zlib.InflateTables.ENOUGH_LENGTHS;
23+
import static io.airlift.compress.zlib.InflateTables.INVALID_CODE;
24+
import static io.airlift.compress.zlib.InflateTables.buildCodeTable;
25+
import static io.airlift.compress.zlib.InflateTables.extractBits;
26+
import static io.airlift.compress.zlib.InflateTables.extractOp;
27+
import static io.airlift.compress.zlib.InflateTables.extractValue;
28+
import static java.lang.Math.toIntExact;
29+
30+
// This implementation is based on zlib by Jean-loup Gailly and Mark Adler
31+
public final class InflateRawDecompressor
32+
{
33+
private static final int NON_COMPRESSED = 0;
34+
private static final int FIXED_HUFFMAN = 1;
35+
private static final int DYNAMIC_HUFFMAN = 2;
36+
37+
private static final int MAX_LENGTH_CODES = 286; // max number of literal/length codes
38+
private static final int MAX_DISTANCE_CODES = 30; // max number of distance codes
39+
40+
private InflateRawDecompressor() {}
41+
42+
public static int decompress(Object inputBase, long inputAddress, long inputLimit, Object outputBase, long outputAddress, long outputLimit)
43+
throws MalformedInputException
44+
{
45+
InputReader reader = new InputReader(inputBase, inputAddress, inputLimit);
46+
OutputWriter writer = new OutputWriter(outputBase, outputAddress, outputLimit);
47+
48+
boolean last;
49+
do {
50+
last = reader.bits(1) == 1;
51+
int type = reader.bits(2);
52+
53+
switch (type) {
54+
case NON_COMPRESSED:
55+
nonCompressed(reader, writer);
56+
break;
57+
case FIXED_HUFFMAN:
58+
fixedHuffman(reader, writer);
59+
break;
60+
case DYNAMIC_HUFFMAN:
61+
dynamicHuffman(reader, writer);
62+
break;
63+
default:
64+
throw new MalformedInputException(reader.offset(), "Invalid block type: " + type);
65+
}
66+
}
67+
while (!last);
68+
69+
if (reader.available() > 0) {
70+
throw new MalformedInputException(reader.offset(), "Output buffer too small");
71+
}
72+
73+
return toIntExact(writer.offset());
74+
}
75+
76+
private static void nonCompressed(InputReader reader, OutputWriter writer)
77+
{
78+
reader.clear();
79+
80+
int lsb = reader.readByte();
81+
int msb = reader.readByte();
82+
83+
int checkLsb = reader.readByte();
84+
int checkMsb = reader.readByte();
85+
86+
if ((lsb != (~checkLsb & 0xFF)) || (msb != (~checkMsb & 0xFF))) {
87+
throw new MalformedInputException(reader.offset(), "Block length does not match complement");
88+
}
89+
90+
int length = (msb << 8) | lsb;
91+
92+
writer.copyInput(reader, length);
93+
}
94+
95+
private static void fixedHuffman(InputReader reader, OutputWriter writer)
96+
{
97+
inflate(InflateTables.FIXED_TABLE, reader, writer);
98+
}
99+
100+
private static final short[] CODE_LENGTHS_ORDER = {
101+
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
102+
};
103+
104+
private static void dynamicHuffman(InputReader reader, OutputWriter writer)
105+
{
106+
int lengthSize = reader.bits(5) + 257;
107+
int distanceSize = reader.bits(5) + 1;
108+
int codeSize = reader.bits(4) + 4;
109+
if (lengthSize > MAX_LENGTH_CODES) {
110+
throw new MalformedInputException(reader.offset(), "Length count is too large: " + lengthSize);
111+
}
112+
if (distanceSize > MAX_DISTANCE_CODES) {
113+
throw new MalformedInputException(reader.offset(), "Distance count is too large: " + distanceSize);
114+
}
115+
116+
short[] codeLengths = new short[19];
117+
for (int i = 0; i < codeSize; i++) {
118+
codeLengths[CODE_LENGTHS_ORDER[i]] = (short) reader.bits(3);
119+
}
120+
121+
int[] codeCode = new int[388];
122+
int codeBits = buildCodeTable(CodeType.CODES, codeLengths, 0, 19, 7, codeCode);
123+
124+
short[] lengths = new short[MAX_LENGTH_CODES + MAX_DISTANCE_CODES];
125+
126+
int index = 0;
127+
while (index < (lengthSize + distanceSize)) {
128+
int code = codeCode[reader.peek(codeBits)];
129+
reader.skip(extractBits(code));
130+
short value = extractValue(code);
131+
132+
if (value < 16) {
133+
lengths[index] = value;
134+
index++;
135+
continue;
136+
}
137+
138+
short length = 0;
139+
int copy;
140+
if (value == 16) {
141+
if (index == 0) {
142+
throw new MalformedInputException(reader.offset(), "No previous length for repeat");
143+
}
144+
length = lengths[index - 1];
145+
copy = reader.bits(2) + 3;
146+
}
147+
else if (value == 17) {
148+
copy = reader.bits(3) + 3;
149+
}
150+
else {
151+
copy = reader.bits(7) + 11;
152+
}
153+
154+
if ((index + copy) > (lengthSize + distanceSize)) {
155+
throw new MalformedInputException(reader.offset(), "Too many lengths for repeat");
156+
}
157+
158+
while (copy > 0) {
159+
lengths[index] = length;
160+
index++;
161+
copy--;
162+
}
163+
}
164+
165+
if (lengths[256] == 0) {
166+
throw new MalformedInputException(reader.offset(), "Missing end-of-block code");
167+
}
168+
169+
int[] lengthCode = new int[ENOUGH_LENGTHS];
170+
int lengthBits = buildCodeTable(CodeType.LENGTHS, lengths, 0, lengthSize, 9, lengthCode);
171+
172+
int[] distanceCode = new int[ENOUGH_DISTANCES];
173+
int distanceBits = buildCodeTable(CodeType.DISTANCES, lengths, lengthSize, distanceSize, 6, distanceCode);
174+
175+
InflateTable table = new InflateTable(lengthCode, lengthBits, distanceCode, distanceBits);
176+
177+
inflate(table, reader, writer);
178+
}
179+
180+
private static void inflate(InflateTable table, InputReader reader, OutputWriter writer)
181+
{
182+
int tableLengthBits = table.lengthBits;
183+
int tableLengthMask = table.lengthMask;
184+
int[] lengths = table.lengthCode;
185+
186+
int tableDistanceBits = table.distanceBits;
187+
int tableDistanceMask = table.distanceMask;
188+
int[] distances = table.distanceCode;
189+
190+
// decode literals and length/distances until end-of-block
191+
while (true) {
192+
int lengthIndex = reader.peek(tableLengthBits, tableLengthMask);
193+
194+
while (true) {
195+
int packedLength = lengths[lengthIndex];
196+
reader.skip(extractBits(packedLength));
197+
int lengthOp = extractOp(packedLength);
198+
int length = extractValue(packedLength);
199+
200+
if (lengthOp == 0) {
201+
// literal
202+
writer.writeByte(reader, (byte) length);
203+
break;
204+
}
205+
206+
if ((lengthOp & 0b0001_0000) != 0) {
207+
// length base
208+
int lengthBits = lengthOp & 0b1111;
209+
if (lengthBits > 0) {
210+
length += reader.bits(lengthBits);
211+
}
212+
213+
int distanceIndex = reader.peek(tableDistanceBits, tableDistanceMask);
214+
while (true) {
215+
int packedDistance = distances[distanceIndex];
216+
reader.skip(extractBits(packedDistance));
217+
int distanceOp = extractOp(packedDistance);
218+
int distance = extractValue(packedDistance);
219+
220+
if ((distanceOp & 0b0001_0000) != 0) {
221+
// distance base
222+
int distanceBits = distanceOp & 0b1111;
223+
if (distanceBits > 0) {
224+
distance += reader.bits(distanceBits);
225+
}
226+
writer.copyOutput(reader, distance, length);
227+
break;
228+
}
229+
230+
if ((distanceOp & INVALID_CODE) == 0) {
231+
// second level distance code
232+
distanceIndex = distance + reader.peek(distanceOp);
233+
continue;
234+
}
235+
236+
throw new MalformedInputException(reader.offset(), "Invalid distance code");
237+
}
238+
break;
239+
}
240+
241+
if ((lengthOp & INVALID_CODE) == 0) {
242+
// second level length code
243+
lengthIndex = length + reader.peek(lengthOp);
244+
continue;
245+
}
246+
247+
if ((lengthOp & END_OF_BLOCK) != 0) {
248+
// end-of-block
249+
return;
250+
}
251+
252+
throw new MalformedInputException(reader.offset(), "Invalid length/literal code");
253+
}
254+
}
255+
}
256+
}

0 commit comments

Comments
 (0)