|
1 | 1 | /* |
2 | 2 | * Copyright 2012 Google, Inc. All Rights Reserved. |
| 3 | + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. |
3 | 4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | 5 | * |
5 | 6 | * This code is free software; you can redistribute it and/or modify it |
|
24 | 25 | /** |
25 | 26 | * @test |
26 | 27 | * @bug 8056934 |
27 | | - * @summary Check ability to read zip files created by python zipfile |
28 | | - * implementation, which fails to write optional (but recommended) data |
29 | | - * descriptor signatures. Repro scenario is a Java -> Python -> Java round trip: |
30 | | - * - ZipOutputStream creates zip file with DEFLATED entries and data |
31 | | - * descriptors with optional signature "PK0x0708". |
32 | | - * - Python reads those entries, preserving the 0x08 flag byte |
33 | | - * - Python outputs those entries with data descriptors lacking the |
34 | | - * optional signature. |
35 | | - * - ZipInputStream cannot handle the missing signature |
36 | | - * |
| 28 | + * @summary Verify the ability to read zip files whose local header |
| 29 | + * data descriptor is missing the optional signature |
| 30 | + * <p> |
37 | 31 | * No way to adapt the technique in this test to get a ZIP64 zip file |
38 | 32 | * without data descriptors was found. |
39 | | - * |
40 | | - * @ignore This test has brittle dependencies on an external working python. |
| 33 | + * @run junit DataDescriptorSignatureMissing |
41 | 34 | */ |
42 | 35 |
|
| 36 | + |
| 37 | +import org.junit.jupiter.api.Test; |
| 38 | + |
43 | 39 | import java.io.*; |
| 40 | +import java.nio.ByteBuffer; |
| 41 | +import java.nio.ByteOrder; |
| 42 | +import java.nio.charset.StandardCharsets; |
44 | 43 | import java.util.zip.*; |
45 | 44 |
|
46 | | -public class DataDescriptorSignatureMissing { |
47 | | - void printStream(InputStream is) throws IOException { |
48 | | - Reader r = new InputStreamReader(is); |
49 | | - StringBuilder sb = new StringBuilder(); |
50 | | - char[] buf = new char[1024]; |
51 | | - int n; |
52 | | - while ((n = r.read(buf)) > 0) { |
53 | | - sb.append(buf, 0, n); |
54 | | - } |
55 | | - System.out.print(sb); |
56 | | - } |
| 45 | +import static org.junit.jupiter.api.Assertions.*; |
| 46 | + |
| 47 | +public class DataDescriptorSignatureMissing { |
| 48 | + |
| 49 | + /** |
| 50 | + * Verify that ZipInputStream correctly parses a ZIP with a Data Descriptor without |
| 51 | + * the recommended but optional signature. |
| 52 | + */ |
| 53 | + @Test |
| 54 | + public void shouldParseSignaturelessDescriptor() throws IOException { |
| 55 | + // The ZIP with a signature-less descriptor |
| 56 | + byte[] zip = makeZipWithSignaturelessDescriptor(); |
57 | 57 |
|
58 | | - int entryCount(File zipFile) throws IOException { |
59 | | - try (FileInputStream fis = new FileInputStream(zipFile); |
60 | | - ZipInputStream zis = new ZipInputStream(fis)) { |
61 | | - for (int count = 0;; count++) |
62 | | - if (zis.getNextEntry() == null) |
63 | | - return count; |
| 58 | + // ZipInputStream should read the signature-less data descriptor |
| 59 | + try (ZipInputStream in = new ZipInputStream( |
| 60 | + new ByteArrayInputStream(zip))) { |
| 61 | + ZipEntry first = in.getNextEntry(); |
| 62 | + assertNotNull(first, "Zip file is unexpectedly missing first entry"); |
| 63 | + assertEquals("first", first.getName()); |
| 64 | + assertArrayEquals("first".getBytes(StandardCharsets.UTF_8), in.readAllBytes()); |
| 65 | + |
| 66 | + ZipEntry second = in.getNextEntry(); |
| 67 | + assertNotNull(second, "Zip file is unexpectedly missing second entry"); |
| 68 | + assertEquals("second", second.getName()); |
| 69 | + assertArrayEquals("second".getBytes(StandardCharsets.UTF_8), in.readAllBytes()); |
64 | 70 | } |
| 71 | + |
65 | 72 | } |
66 | 73 |
|
67 | | - void test(String[] args) throws Throwable { |
68 | | - if (! new File("/usr/bin/python").canExecute()) |
69 | | - return; |
70 | | - |
71 | | - // Create a java zip file with DEFLATED entries and data |
72 | | - // descriptors with signatures. |
73 | | - final File in = new File("in.zip"); |
74 | | - final File out = new File("out.zip"); |
75 | | - final int count = 3; |
76 | | - try (FileOutputStream fos = new FileOutputStream(in); |
77 | | - ZipOutputStream zos = new ZipOutputStream(fos)) { |
78 | | - for (int i = 0; i < count; i++) { |
79 | | - ZipEntry ze = new ZipEntry("hello.python" + i); |
80 | | - ze.setMethod(ZipEntry.DEFLATED); |
81 | | - zos.putNextEntry(ze); |
82 | | - zos.write(new byte[10]); |
83 | | - zos.closeEntry(); |
84 | | - } |
| 74 | + /** |
| 75 | + * The 'Data descriptor' record is used to facilitate ZIP streaming. If the size of an |
| 76 | + * entry is unknown at the time the LOC header is written, bit 3 of the General Purpose Bit Flag |
| 77 | + * is set, and the File data is immediately followed by the 'Data descriptor' record. This record |
| 78 | + * then contains the compressed and uncompressed sizes of the entry and also the CRC value. |
| 79 | + * |
| 80 | + * The 'Data descriptor' record is usually preceded by the recommended, but optional |
| 81 | + * signature value 0x08074b50. |
| 82 | + * |
| 83 | + * A ZIP entry in streaming mode has the following structure: |
| 84 | + * |
| 85 | + * ------ Local File Header ------ |
| 86 | + * 000000 signature 0x04034b50 |
| 87 | + * 000004 version 20 |
| 88 | + * 000006 flags 0x0808 # Notice bit 3 is set |
| 89 | + * [..] Omitted for brevity |
| 90 | + * |
| 91 | + * ------ File Data ------ |
| 92 | + * 000035 data 7 bytes |
| 93 | + * |
| 94 | + * ------ Data Descriptor ------ |
| 95 | + * 000042 signature 0x08074b50 |
| 96 | + * 000046 crc 0x3610a686 |
| 97 | + * 000050 csize 7 |
| 98 | + * 000054 size 5 |
| 99 | + * |
| 100 | + * A signature-less data descriptor will look like the following: |
| 101 | + * |
| 102 | + * ------ Data Descriptor ------ |
| 103 | + * 000042 crc 0x3610a686 |
| 104 | + * 000046 csize 7 |
| 105 | + * 000050 size 5 |
| 106 | + * |
| 107 | + * This method produces a ZIP with two entries, where the first entry |
| 108 | + * is made signature-less. |
| 109 | + */ |
| 110 | + private static byte[] makeZipWithSignaturelessDescriptor() throws IOException { |
| 111 | + // Offset of the signed data descriptor |
| 112 | + int sigOffset; |
| 113 | + |
| 114 | + ByteArrayOutputStream out = new ByteArrayOutputStream(); |
| 115 | + try (ZipOutputStream zo = new ZipOutputStream(out)) { |
| 116 | + // Write a first entry |
| 117 | + zo.putNextEntry(new ZipEntry("first")); |
| 118 | + zo.write("first".getBytes(StandardCharsets.UTF_8)); |
| 119 | + // Force the data descriptor to be written out |
| 120 | + zo.closeEntry(); |
| 121 | + // Signed data descriptor starts 16 bytes before current offset |
| 122 | + sigOffset = out.size() - 4 * Integer.BYTES; |
| 123 | + // Add a second entry |
| 124 | + zo.putNextEntry(new ZipEntry("second")); |
| 125 | + zo.write("second".getBytes(StandardCharsets.UTF_8)); |
85 | 126 | } |
86 | 127 |
|
87 | | - // Copy the zip file using python's zipfile module |
88 | | - String[] python_program_lines = { |
89 | | - "import os", |
90 | | - "import zipfile", |
91 | | - "input_zip = zipfile.ZipFile('in.zip', mode='r')", |
92 | | - "output_zip = zipfile.ZipFile('out.zip', mode='w')", |
93 | | - "count08 = 0", |
94 | | - "for input_info in input_zip.infolist():", |
95 | | - " output_info = input_info", |
96 | | - " if output_info.flag_bits & 0x08 == 0x08:", |
97 | | - " count08 += 1", |
98 | | - " output_zip.writestr(output_info, input_zip.read(input_info))", |
99 | | - "output_zip.close()", |
100 | | - "if count08 == 0:", |
101 | | - " raise ValueError('Expected to see entries with 0x08 flag_bits set')", |
102 | | - }; |
103 | | - StringBuilder python_program_builder = new StringBuilder(); |
104 | | - for (String line : python_program_lines) |
105 | | - python_program_builder.append(line).append('\n'); |
106 | | - String python_program = python_program_builder.toString(); |
107 | | - String[] cmdline = { "/usr/bin/python", "-c", python_program }; |
108 | | - ProcessBuilder pb = new ProcessBuilder(cmdline); |
109 | | - pb.redirectErrorStream(true); |
110 | | - Process p = pb.start(); |
111 | | - printStream(p.getInputStream()); |
112 | | - p.waitFor(); |
113 | | - equal(p.exitValue(), 0); |
114 | | - |
115 | | - File pythonZipFile = new File("out.zip"); |
116 | | - check(pythonZipFile.exists()); |
117 | | - |
118 | | - equal(entryCount(in), |
119 | | - entryCount(out)); |
120 | | - |
121 | | - // We expect out to be identical to in, except for the removal of |
122 | | - // the optional data descriptor signatures. |
123 | | - final int SIG_LENGTH = 4; // length of a zip signature - PKxx |
124 | | - equal(in.length(), |
125 | | - out.length() + SIG_LENGTH * count); |
126 | | - |
127 | | - in.delete(); |
128 | | - out.delete(); |
129 | | - } |
| 128 | + // The generated ZIP file with a signed data descriptor |
| 129 | + byte[] sigZip = out.toByteArray(); |
| 130 | + |
| 131 | + // The offset of the CRC immediately following the 4-byte signature |
| 132 | + int crcOffset = sigOffset + Integer.BYTES; |
| 133 | + |
| 134 | + // Create a ZIP file with a signature-less data descriptor for the first entry |
| 135 | + ByteArrayOutputStream sigLess = new ByteArrayOutputStream(); |
| 136 | + sigLess.write(sigZip, 0, sigOffset); |
| 137 | + // Skip the signature |
| 138 | + sigLess.write(sigZip, crcOffset, sigZip.length - crcOffset); |
| 139 | + |
| 140 | + byte[] siglessZip = sigLess.toByteArray(); |
130 | 141 |
|
131 | | - //--------------------- Infrastructure --------------------------- |
132 | | - volatile int passed = 0, failed = 0; |
133 | | - void pass() {passed++;} |
134 | | - void fail() {failed++; Thread.dumpStack();} |
135 | | - void fail(String msg) {System.err.println(msg); fail();} |
136 | | - void unexpected(Throwable t) {failed++; t.printStackTrace();} |
137 | | - void check(boolean cond) {if (cond) pass(); else fail();} |
138 | | - void equal(Object x, Object y) { |
139 | | - if (x == null ? y == null : x.equals(y)) pass(); |
140 | | - else fail(x + " not equal to " + y);} |
141 | | - public static void main(String[] args) throws Throwable { |
142 | | - new DataDescriptorSignatureMissing().instanceMain(args);} |
143 | | - public void instanceMain(String[] args) throws Throwable { |
144 | | - try {test(args);} catch (Throwable t) {unexpected(t);} |
145 | | - System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed); |
146 | | - if (failed > 0) throw new AssertionError("Some tests failed");} |
| 142 | + // Adjust the CEN offset in the END header |
| 143 | + ByteBuffer buffer = ByteBuffer.wrap(siglessZip).order(ByteOrder.LITTLE_ENDIAN); |
| 144 | + // Reduce cenOffset by 4 bytes |
| 145 | + int cenOff = siglessZip.length - ZipFile.ENDHDR + ZipFile.ENDOFF; |
| 146 | + int realCenOff = buffer.getInt(cenOff) - Integer.BYTES; |
| 147 | + buffer.putInt(cenOff, realCenOff); |
| 148 | + |
| 149 | + // Adjust the LOC offset in the second CEN header |
| 150 | + int cen = realCenOff; |
| 151 | + // Skip past the first CEN header |
| 152 | + int nlen = buffer.getShort(cen + ZipFile.CENNAM); |
| 153 | + cen += ZipFile.CENHDR + nlen; |
| 154 | + |
| 155 | + // Reduce LOC offset by 4 bytes |
| 156 | + int locOff = cen + ZipFile.CENOFF; |
| 157 | + buffer.putInt(locOff, buffer.getInt(locOff) - Integer.BYTES); |
| 158 | + |
| 159 | + return siglessZip; |
| 160 | + } |
147 | 161 | } |
0 commit comments