Skip to content

Commit eee6759

Browse files
committed
Draft commit for reducing read IOPS in temporary package parts.
1 parent 7fbc3a4 commit eee6759

File tree

6 files changed

+494
-4
lines changed

6 files changed

+494
-4
lines changed

poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/ZipPackage.java

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ Licensed to the Apache Software Foundation (ASF) under one or more
2727
import java.io.OutputStream;
2828
import java.nio.file.Files;
2929
import java.nio.file.Path;
30-
import java.nio.file.Paths;
3130
import java.util.Collections;
3231
import java.util.List;
3332
import java.util.stream.Collectors;
@@ -52,7 +51,6 @@ Licensed to the Apache Software Foundation (ASF) under one or more
5251
import org.apache.poi.openxml4j.util.ZipFileZipEntrySource;
5352
import org.apache.poi.openxml4j.util.ZipInputStreamZipEntrySource;
5453
import org.apache.poi.util.IOUtils;
55-
import org.apache.poi.util.TempFile;
5654

5755
/**
5856
* Physical zip package.
@@ -62,6 +60,7 @@ public final class ZipPackage extends OPCPackage {
6260
private static final String SETTINGS_XML = "settings.xml";
6361
private static boolean useTempFilePackageParts = false;
6462
private static boolean encryptTempFilePackageParts = false;
63+
private static boolean bufferTempFilePackageParts = false;
6564

6665
private static final Logger LOG = LogManager.getLogger(ZipPackage.class);
6766

@@ -85,6 +84,13 @@ public static void setEncryptTempFilePackageParts(boolean encryptTempFiles) {
8584
encryptTempFilePackageParts = encryptTempFiles;
8685
}
8786

87+
/**
88+
* @param bufferTempFiles whether to buffer package part temp files
89+
*/
90+
public static void setBufferTempFilePackageParts(boolean bufferTempFiles) {
91+
bufferTempFilePackageParts = bufferTempFiles;
92+
}
93+
8894
/**
8995
* @return whether package part data is stored in temp files to save memory
9096
*/
@@ -99,6 +105,13 @@ public static boolean encryptTempFilePackageParts() {
99105
return encryptTempFilePackageParts;
100106
}
101107

108+
/**
109+
* @return whether package part temp files are buffered
110+
*/
111+
public static boolean bufferTempFilePackageParts() {
112+
return bufferTempFilePackageParts;
113+
}
114+
102115
/**
103116
* Constructor. Creates a new, empty ZipPackage.
104117
*/
@@ -422,8 +435,16 @@ protected PackagePart createPartImpl(PackagePartName partName,
422435
try {
423436
if (useTempFilePackageParts) {
424437
if (encryptTempFilePackageParts) {
425-
return new EncryptedTempFilePackagePart(this, partName, contentType, loadRelationships);
426-
} else {
438+
if (bufferTempFilePackageParts) {
439+
return new BufferedEncryptedTempFilePackagePart(this, partName, contentType, loadRelationships);
440+
} else {
441+
return new EncryptedTempFilePackagePart(this, partName, contentType, loadRelationships);
442+
}
443+
}
444+
else if (bufferTempFilePackageParts) {
445+
return new BufferedTempFilePackagePart(this, partName, contentType, loadRelationships);
446+
}
447+
else {
427448
return new TempFilePackagePart(this, partName, contentType, loadRelationships);
428449
}
429450
} else {
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
/* ====================================================================
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
(the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
==================================================================== */
17+
18+
package org.apache.poi.openxml4j.opc.internal;
19+
20+
import org.apache.logging.log4j.LogManager;
21+
import org.apache.logging.log4j.Logger;
22+
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
23+
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
24+
import org.apache.poi.openxml4j.opc.OPCPackage;
25+
import org.apache.poi.openxml4j.opc.PackagePart;
26+
import org.apache.poi.openxml4j.opc.PackagePartName;
27+
import org.apache.poi.openxml4j.opc.internal.marshallers.ZipPartMarshaller;
28+
import org.apache.poi.poifs.crypt.temp.EncryptedTempData;
29+
import org.apache.poi.util.Beta;
30+
import org.apache.poi.util.IOUtils;
31+
32+
import java.io.BufferedInputStream;
33+
import java.io.BufferedOutputStream;
34+
import java.io.FileInputStream;
35+
import java.io.FileOutputStream;
36+
import java.io.IOException;
37+
import java.io.InputStream;
38+
import java.io.OutputStream;
39+
40+
/**
41+
* (Experimental) Buffered Encrypted Temp File version of a package part.
42+
*
43+
* @since POI 5.2.4
44+
*/
45+
@Beta
46+
public final class BufferedEncryptedTempFilePackagePart extends PackagePart {
47+
private static int fileBufferSize = 1024 * 1024; // 1 MB
48+
private static final Logger LOG = LogManager.getLogger(BufferedEncryptedTempFilePackagePart.class);
49+
50+
/**
51+
* Storage for the part data.
52+
*/
53+
private EncryptedTempData tempFile;
54+
55+
/**
56+
* Constructor.
57+
*
58+
* @param pack
59+
* The owner package.
60+
* @param partName
61+
* The part name.
62+
* @param contentType
63+
* The content type.
64+
* @throws InvalidFormatException
65+
* If the specified URI is not OPC compliant.
66+
* @throws IOException
67+
* If temp file cannot be created.
68+
*/
69+
public BufferedEncryptedTempFilePackagePart(OPCPackage pack, PackagePartName partName,
70+
String contentType) throws InvalidFormatException, IOException {
71+
this(pack, partName, contentType, true);
72+
}
73+
74+
/**
75+
* Constructor.
76+
*
77+
* @param pack
78+
* The owner package.
79+
* @param partName
80+
* The part name.
81+
* @param contentType
82+
* The content type.
83+
* @param loadRelationships
84+
* Specify if the relationships will be loaded.
85+
* @throws InvalidFormatException
86+
* If the specified URI is not OPC compliant.
87+
* @throws IOException
88+
* If temp file cannot be created.
89+
*/
90+
public BufferedEncryptedTempFilePackagePart(OPCPackage pack, PackagePartName partName,
91+
String contentType, boolean loadRelationships)
92+
throws InvalidFormatException, IOException {
93+
super(pack, partName, new ContentType(contentType), loadRelationships);
94+
tempFile = new EncryptedTempData();
95+
}
96+
97+
/**
98+
* Allows configuration of read/write buffer sizes of temp file package parts
99+
*
100+
* @param bufferSize
101+
* Size of the buffer used for input/output streams.
102+
*/
103+
public static void setBufferSize(int bufferSize) {
104+
fileBufferSize = bufferSize;
105+
}
106+
107+
@Override
108+
protected InputStream getInputStreamImpl() throws IOException {
109+
return new BufferedInputStream(tempFile.getInputStream(), fileBufferSize);
110+
}
111+
112+
@Override
113+
protected OutputStream getOutputStreamImpl() throws IOException {
114+
return new BufferedOutputStream(tempFile.getOutputStream(), fileBufferSize);
115+
}
116+
117+
@Override
118+
public long getSize() {
119+
return tempFile.getByteCount();
120+
}
121+
122+
@Override
123+
public void clear() {
124+
try(OutputStream os = getOutputStreamImpl()) {
125+
os.write(new byte[0]);
126+
} catch (IOException e) {
127+
LOG.atWarn().log("Failed to clear data in temp file", e);
128+
}
129+
}
130+
131+
@Override
132+
public boolean save(OutputStream os) throws OpenXML4JException {
133+
return new ZipPartMarshaller().marshall(this, os);
134+
}
135+
136+
@Override
137+
public boolean load(InputStream is) throws InvalidFormatException {
138+
try (OutputStream os = getOutputStreamImpl()) {
139+
IOUtils.copy(is, os);
140+
} catch(IOException e) {
141+
throw new InvalidFormatException(e.getMessage(), e);
142+
}
143+
144+
// All done
145+
return true;
146+
}
147+
148+
@Override
149+
public void close() {
150+
tempFile.dispose();
151+
}
152+
153+
@Override
154+
public void flush() {
155+
// Do nothing
156+
}
157+
}
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
/* ====================================================================
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
(the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
==================================================================== */
17+
18+
package org.apache.poi.openxml4j.opc.internal;
19+
20+
import org.apache.logging.log4j.LogManager;
21+
import org.apache.logging.log4j.Logger;
22+
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
23+
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
24+
import org.apache.poi.openxml4j.opc.OPCPackage;
25+
import org.apache.poi.openxml4j.opc.PackagePart;
26+
import org.apache.poi.openxml4j.opc.PackagePartName;
27+
import org.apache.poi.openxml4j.opc.internal.marshallers.ZipPartMarshaller;
28+
import org.apache.poi.util.Beta;
29+
import org.apache.poi.util.IOUtils;
30+
import org.apache.poi.util.TempFile;
31+
32+
import java.io.BufferedInputStream;
33+
import java.io.BufferedOutputStream;
34+
import java.io.File;
35+
import java.io.FileInputStream;
36+
import java.io.FileOutputStream;
37+
import java.io.IOException;
38+
import java.io.InputStream;
39+
import java.io.OutputStream;
40+
41+
/**
42+
* (Experimental) Buffered Temp File version of a package part.
43+
*
44+
* @since POI 5.4.2
45+
*/
46+
@Beta
47+
public final class BufferedTempFilePackagePart extends PackagePart {
48+
private static int fileBufferSize = 1024 * 1024; // 1 MB
49+
private static final Logger LOG = LogManager.getLogger(BufferedTempFilePackagePart.class);
50+
51+
/**
52+
* Storage for the part data.
53+
*/
54+
private File tempFile;
55+
56+
/**
57+
* Constructor.
58+
*
59+
* @param pack
60+
* The owner package.
61+
* @param partName
62+
* The part name.
63+
* @param contentType
64+
* The content type.
65+
* @throws InvalidFormatException
66+
* If the specified URI is not OPC compliant.
67+
* @throws IOException
68+
* If temp file cannot be created.
69+
*/
70+
public BufferedTempFilePackagePart(OPCPackage pack, PackagePartName partName,
71+
String contentType) throws InvalidFormatException, IOException {
72+
this(pack, partName, contentType, true);
73+
}
74+
75+
/**
76+
* Constructor.
77+
*
78+
* @param pack
79+
* The owner package.
80+
* @param partName
81+
* The part name.
82+
* @param contentType
83+
* The content type.
84+
* @param loadRelationships
85+
* Specify if the relationships will be loaded.
86+
* @throws InvalidFormatException
87+
* If the specified URI is not OPC compliant.
88+
* @throws IOException
89+
* If temp file cannot be created.
90+
*/
91+
public BufferedTempFilePackagePart(OPCPackage pack, PackagePartName partName,
92+
String contentType, boolean loadRelationships)
93+
throws InvalidFormatException, IOException {
94+
super(pack, partName, new ContentType(contentType), loadRelationships);
95+
tempFile = TempFile.createTempFile("poi-package-part", ".tmp");
96+
}
97+
98+
/**
99+
* Allows configuration of read/write buffer sizes of temp file package parts
100+
*
101+
* @param bufferSize
102+
* Size of the buffer used for input/output streams.
103+
*/
104+
public static void setBufferSize(int bufferSize) {
105+
fileBufferSize = bufferSize;
106+
}
107+
108+
@Override
109+
protected InputStream getInputStreamImpl() throws IOException {
110+
return new BufferedInputStream(new FileInputStream(tempFile), fileBufferSize);
111+
}
112+
113+
@Override
114+
protected OutputStream getOutputStreamImpl() throws IOException {
115+
return new BufferedOutputStream(new FileOutputStream(tempFile), fileBufferSize);
116+
}
117+
118+
@Override
119+
public long getSize() {
120+
return tempFile.length();
121+
}
122+
123+
@Override
124+
public void clear() {
125+
try(OutputStream os = getOutputStreamImpl()) {
126+
os.write(new byte[0]);
127+
} catch (IOException e) {
128+
LOG.atWarn().log("Failed to clear data in temp file", e);
129+
}
130+
}
131+
132+
@Override
133+
public boolean save(OutputStream os) throws OpenXML4JException {
134+
return new ZipPartMarshaller().marshall(this, os);
135+
}
136+
137+
@Override
138+
public boolean load(InputStream is) throws InvalidFormatException {
139+
try (OutputStream os = getOutputStreamImpl()) {
140+
IOUtils.copy(is, os);
141+
} catch(IOException e) {
142+
throw new InvalidFormatException(e.getMessage(), e);
143+
}
144+
145+
// All done
146+
return true;
147+
}
148+
149+
@Override
150+
public void close() {
151+
if (!tempFile.delete()) {
152+
LOG.atInfo().log("Failed to delete temp file; may already have been closed and deleted");
153+
}
154+
}
155+
156+
@Override
157+
public void flush() {
158+
// Do nothing
159+
}
160+
}

0 commit comments

Comments
 (0)