Skip to content

Commit 5c874fa

Browse files
committed
[GR-35700] Read GZip header before decompression
PullRequest: graalpython/2080
2 parents 877d9e4 + b1cdeae commit 5c874fa

File tree

3 files changed

+106
-23
lines changed

3 files changed

+106
-23
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/zlib/ZLibCompObject.java

Lines changed: 87 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,22 @@
4343
import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ZlibCompress;
4444
import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ZlibDecompress;
4545
import static com.oracle.graal.python.builtins.modules.zlib.ZLibModuleBuiltins.MAX_WBITS;
46+
import static com.oracle.graal.python.builtins.objects.bytes.BytesUtils.mask;
47+
import static com.oracle.graal.python.runtime.exception.PythonErrorType.ZLibError;
4648

49+
import java.util.zip.CRC32;
4750
import java.util.zip.DataFormatException;
4851
import java.util.zip.Deflater;
4952
import java.util.zip.Inflater;
5053

5154
import com.oracle.graal.python.builtins.objects.bytes.PBytes;
5255
import com.oracle.graal.python.builtins.objects.object.PythonBuiltinObject;
56+
import com.oracle.graal.python.nodes.PRaiseNode;
5357
import com.oracle.graal.python.runtime.NFIZlibSupport;
5458
import com.oracle.graal.python.runtime.object.PythonObjectFactory;
59+
import com.oracle.graal.python.util.PythonUtils;
5560
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
61+
import com.oracle.truffle.api.nodes.Node;
5662
import com.oracle.truffle.api.object.Shape;
5763

5864
public abstract class ZLibCompObject extends PythonBuiltinObject {
@@ -109,6 +115,7 @@ protected static class JavaZlibCompObject extends ZLibCompObject {
109115

110116
private byte[] inputData; // helper for copy operation
111117
private boolean canCopy; // to assist if copying is allowed
118+
private boolean readHeader;
112119

113120
public JavaZlibCompObject(Object cls, Shape instanceShape, Object stream, int level, int wbits, int strategy, byte[] zdict) {
114121
super(cls, instanceShape);
@@ -119,6 +126,7 @@ public JavaZlibCompObject(Object cls, Shape instanceShape, Object stream, int le
119126
this.strategy = strategy;
120127
this.inputData = null;
121128
this.canCopy = true;
129+
this.readHeader = wbits >= 25 && wbits <= 31;
122130
}
123131

124132
public JavaZlibCompObject(Object cls, Shape instanceShape, Object stream, int wbits, byte[] zdict) {
@@ -146,11 +154,17 @@ public void setDeflaterInput(byte[] data) {
146154
}
147155

148156
@TruffleBoundary
149-
public void setInflaterInput(byte[] data) {
157+
public void setInflaterInput(byte[] data, Node node) {
150158
assert stream instanceof Inflater;
159+
byte[] bytes = data;
160+
if (readHeader) {
161+
readHeader = false;
162+
int h = gzipHeader(data, node);
163+
bytes = PythonUtils.arrayCopyOfRange(bytes, h, data.length - h);
164+
}
151165
canCopy = inputData == null;
152-
inputData = data;
153-
((Inflater) stream).setInput(data);
166+
inputData = bytes;
167+
((Inflater) stream).setInput(bytes);
154168
}
155169

156170
@TruffleBoundary
@@ -172,7 +186,7 @@ public ZLibCompObject copyCompressObj(PythonObjectFactory factory) {
172186
}
173187

174188
@TruffleBoundary
175-
public ZLibCompObject copyDecompressObj(PythonObjectFactory factory) {
189+
public ZLibCompObject copyDecompressObj(PythonObjectFactory factory, Node node) {
176190
assert canCopy;
177191
boolean isRAW = wbits < 0;
178192
Inflater inflater = new Inflater(isRAW || wbits > (MAX_WBITS + 9));
@@ -182,7 +196,7 @@ public ZLibCompObject copyDecompressObj(PythonObjectFactory factory) {
182196
ZLibCompObject obj = factory.createJavaZLibCompObject(ZlibDecompress, inflater, wbits, zdict);
183197
if (inputData != null) {
184198
try {
185-
((JavaZlibCompObject) obj).setInflaterInput(inputData);
199+
((JavaZlibCompObject) obj).setInflaterInput(inputData, node);
186200
inflater.setInput(inputData);
187201
int n = inflater.inflate(new byte[ZLibModuleBuiltins.DEF_BUF_SIZE]);
188202
if (!isRAW && n == 0 && inflater.needsDictionary() && zdict.length > 0) {
@@ -197,6 +211,74 @@ public ZLibCompObject copyDecompressObj(PythonObjectFactory factory) {
197211
obj.setUnusedData(getUnusedData());
198212
return obj;
199213
}
214+
215+
public static final int GZIP_MAGIC = 0x8b1f;
216+
private static final int FHCRC = 2; // Header CRC
217+
private static final int FEXTRA = 4; // Extra field
218+
private static final int FNAME = 8; // File name
219+
private static final int FCOMMENT = 16; // File comment
220+
221+
private static int getValue(byte b, CRC32 crc) {
222+
int v = mask(b);
223+
crc.update(v);
224+
return v;
225+
}
226+
227+
private static int readShort(byte[] bytes, int off, CRC32 crc) {
228+
return getValue(bytes[off + 1], crc) << 8 | getValue(bytes[off], crc);
229+
}
230+
231+
// logic is from GZIPInputStream.readHeader()
232+
@TruffleBoundary
233+
private static int gzipHeader(byte[] bytes, Node node) {
234+
CRC32 crc = new CRC32();
235+
int idx = 0;
236+
// Check header magic
237+
if (readShort(bytes, idx, crc) != GZIP_MAGIC) {
238+
throw PRaiseNode.raiseUncached(node, ZLibError, "Not in GZIP format");
239+
}
240+
idx += 2;
241+
// Check compression method
242+
if (getValue(bytes[idx++], crc) != 8) {
243+
throw PRaiseNode.raiseUncached(node, ZLibError, "Unsupported compression method");
244+
}
245+
// Read flags
246+
int flg = getValue(bytes[idx++], crc);
247+
// Skip MTIME, XFL, and OS fields
248+
idx += 6;
249+
int n = 2 + 2 + 6;
250+
// Skip optional extra field
251+
if ((flg & FEXTRA) == FEXTRA) {
252+
int m = getValue(bytes[idx++], crc);
253+
idx += m;
254+
n += m + 2;
255+
}
256+
// Skip optional file name
257+
if ((flg & FNAME) == FNAME) {
258+
do {
259+
n++;
260+
} while (getValue(bytes[idx++], crc) != 0);
261+
}
262+
// Skip optional file comment
263+
if ((flg & FCOMMENT) == FCOMMENT) {
264+
do {
265+
n++;
266+
} while (getValue(bytes[idx++], crc) != 0);
267+
}
268+
// Check optional header CRC
269+
crc.reset();
270+
if ((flg & FHCRC) == FHCRC) {
271+
int v = (int) crc.getValue() & 0xffff;
272+
if (readShort(bytes, idx, crc) != v) {
273+
throw PRaiseNode.raiseUncached(node, ZLibError, "Corrupt GZIP header");
274+
}
275+
idx += 2;
276+
n += 2;
277+
}
278+
crc.reset();
279+
return idx;
280+
}
281+
200282
}
201283

202284
public boolean isInitialized() {

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/zlib/ZlibDecompressBuiltins.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ abstract static class BaseCopyNode extends PNodeWithContext {
151151
public abstract Object execute(ZLibCompObject self, PythonContext ctxt, PythonObjectFactory factory);
152152

153153
@Specialization(guards = "self.isInitialized()")
154-
Object doNative(ZLibCompObject.NativeZlibCompObject self, PythonContext ctxt, PythonObjectFactory factory,
154+
static Object doNative(ZLibCompObject.NativeZlibCompObject self, PythonContext ctxt, PythonObjectFactory factory,
155155
@Cached NativeLibrary.InvokeNativeFunction createCompObject,
156156
@Cached NativeLibrary.InvokeNativeFunction decompressObjCopy,
157157
@Cached NativeLibrary.InvokeNativeFunction deallocateStream,
@@ -173,19 +173,19 @@ Object doNative(ZLibCompObject.NativeZlibCompObject self, PythonContext ctxt, Py
173173

174174
@Specialization(guards = {"self.isInitialized()", "self.canCopy()"})
175175
Object doJava(ZLibCompObject.JavaZlibCompObject self, @SuppressWarnings("unused") PythonContext ctxt, PythonObjectFactory factory) {
176-
return self.copyDecompressObj(factory);
176+
return self.copyDecompressObj(factory, this);
177177
}
178178

179179
@SuppressWarnings("unused")
180180
@Specialization(guards = {"self.isInitialized()", "!self.canCopy()"})
181-
PNone error(ZLibCompObject.JavaZlibCompObject self, PythonContext ctxt, PythonObjectFactory factory,
181+
static PNone error(ZLibCompObject.JavaZlibCompObject self, PythonContext ctxt, PythonObjectFactory factory,
182182
@Cached.Shared("r") @Cached PRaiseNode raise) {
183183
throw raise.raise(NotImplementedError, "JDK based zlib doesn't support copying");
184184
}
185185

186186
@SuppressWarnings("unused")
187187
@Specialization(guards = "!self.isInitialized()")
188-
PNone error(ZLibCompObject self, PythonContext ctxt, PythonObjectFactory factory,
188+
static PNone error(ZLibCompObject self, PythonContext ctxt, PythonObjectFactory factory,
189189
@Cached.Shared("r") @Cached PRaiseNode raise) {
190190
throw raise.raise(ValueError, INCONSISTENT_STREAM_STATE);
191191
}
@@ -291,12 +291,12 @@ PBytes doit(ZLibCompObject.NativeZlibCompObject self,
291291
}
292292

293293
@Specialization(guards = "!self.isInitialized()")
294-
PBytes doeof(ZLibCompObject.NativeZlibCompObject self) {
294+
static PBytes doeof(ZLibCompObject.NativeZlibCompObject self) {
295295
return self.getUnusedData();
296296
}
297297

298298
@Specialization
299-
PBytes doit(ZLibCompObject.JavaZlibCompObject self) {
299+
static PBytes doit(ZLibCompObject.JavaZlibCompObject self) {
300300
return self.getUnusedData();
301301
}
302302
}
@@ -314,12 +314,12 @@ PBytes doit(ZLibCompObject.NativeZlibCompObject self,
314314
}
315315

316316
@Specialization(guards = "!self.isInitialized()")
317-
PBytes doeof(ZLibCompObject.NativeZlibCompObject self) {
317+
static PBytes doeof(ZLibCompObject.NativeZlibCompObject self) {
318318
return self.getUnconsumedTail();
319319
}
320320

321321
@Specialization
322-
PBytes doit(ZLibCompObject.JavaZlibCompObject self) {
322+
static PBytes doit(ZLibCompObject.JavaZlibCompObject self) {
323323
return self.getUnconsumedTail();
324324
}
325325
}
@@ -328,7 +328,7 @@ PBytes doit(ZLibCompObject.JavaZlibCompObject self) {
328328
@GenerateNodeFactory
329329
abstract static class EOFNode extends PythonUnaryBuiltinNode {
330330
@Specialization(guards = "self.isEof() || !self.isInitialized()")
331-
boolean doit(ZLibCompObject.NativeZlibCompObject self) {
331+
static boolean doit(ZLibCompObject.NativeZlibCompObject self) {
332332
return self.isEof();
333333
}
334334

@@ -344,7 +344,7 @@ boolean getit(ZLibCompObject.NativeZlibCompObject self,
344344
}
345345

346346
@Specialization
347-
boolean doit(ZLibCompObject.JavaZlibCompObject self) {
347+
static boolean doit(ZLibCompObject.JavaZlibCompObject self) {
348348
return self.isEof();
349349
}
350350
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/zlib/ZlibNodes.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
import com.oracle.graal.python.util.OverflowException;
8686
import com.oracle.graal.python.util.PythonUtils;
8787
import com.oracle.truffle.api.CompilerDirectives;
88+
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
8889
import com.oracle.truffle.api.dsl.Cached;
8990
import com.oracle.truffle.api.dsl.Cached.Shared;
9091
import com.oracle.truffle.api.dsl.Fallback;
@@ -508,7 +509,7 @@ abstract static class JavaCompressNode extends PNodeWithContext {
508509

509510
public abstract PBytes execute(ZLibCompObject.JavaZlibCompObject self, int mode, PythonObjectFactory factory);
510511

511-
@CompilerDirectives.TruffleBoundary
512+
@TruffleBoundary
512513
@Specialization
513514
PBytes doit(ZLibCompObject.JavaZlibCompObject self, int mode, PythonObjectFactory factory) {
514515
byte[] result = new byte[DEF_BUF_SIZE];
@@ -538,7 +539,7 @@ abstract static class JavaDecompressNode extends PNodeWithContext {
538539

539540
public abstract byte[] execute(ZLibCompObject.JavaZlibCompObject self, Object data, int maxLength, int bufSize, PythonObjectFactory factory);
540541

541-
@CompilerDirectives.TruffleBoundary
542+
@TruffleBoundary
542543
@Specialization
543544
byte[] doit(ZLibCompObject.JavaZlibCompObject self, byte[] bytes, int maxLength, int bufSize, PythonObjectFactory factory,
544545
@Cached PRaiseNode raise,
@@ -549,7 +550,7 @@ byte[] doit(ZLibCompObject.JavaZlibCompObject self, byte[] bytes, int maxLength,
549550
boolean zdictIsSet = false;
550551

551552
Inflater inflater = (Inflater) self.stream;
552-
self.setInflaterInput(bytes);
553+
self.setInflaterInput(bytes, raise);
553554

554555
int bytesWritten = result.length;
555556
ByteArrayOutputStream baos = new ByteArrayOutputStream();
@@ -579,7 +580,7 @@ byte[] doit(ZLibCompObject.JavaZlibCompObject self, byte[] bytes, int maxLength,
579580
return baos.toByteArray();
580581
}
581582

582-
@CompilerDirectives.TruffleBoundary
583+
@TruffleBoundary
583584
private static void saveUnconsumedInput(ZLibCompObject.JavaZlibCompObject self, byte[] data,
584585
byte[] unusedDataBytes, int unconsumedTailLen, PythonObjectFactory factory) {
585586
Inflater inflater = (Inflater) self.stream;
@@ -602,17 +603,17 @@ private static void saveUnconsumedInput(ZLibCompObject.JavaZlibCompObject self,
602603
}
603604
}
604605

605-
@CompilerDirectives.TruffleBoundary
606+
@TruffleBoundary
606607
public static boolean needsInput(Inflater inflater) {
607608
return inflater.needsInput();
608609
}
609610

610-
@CompilerDirectives.TruffleBoundary
611+
@TruffleBoundary
611612
public static int getRemaining(Inflater inflater) {
612613
return inflater.getRemaining();
613614
}
614615

615-
@CompilerDirectives.TruffleBoundary
616+
@TruffleBoundary
616617
public static int getBytesRead(Inflater inflater) {
617618
try {
618619
return PInt.intValueExact(inflater.getBytesRead());
@@ -621,7 +622,7 @@ public static int getBytesRead(Inflater inflater) {
621622
}
622623
}
623624

624-
@CompilerDirectives.TruffleBoundary
625+
@TruffleBoundary
625626
public static boolean isFinished(Inflater inflater) {
626627
return inflater.finished();
627628
}

0 commit comments

Comments
 (0)