Skip to content

Commit 024adf3

Browse files
committed
Added support for writing compressed output files
1 parent 370d2c5 commit 024adf3

File tree

2 files changed

+152
-13
lines changed

2 files changed

+152
-13
lines changed

src/main/java/org/culturegraph/mf/stream/sink/ObjectFileWriter.java

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,17 @@
1515
*/
1616
package org.culturegraph.mf.stream.sink;
1717

18-
import java.io.FileOutputStream;
19-
import java.io.IOException;
20-
import java.io.OutputStreamWriter;
21-
import java.io.Writer;
22-
import java.util.regex.Matcher;
23-
import java.util.regex.Pattern;
24-
25-
import org.culturegraph.mf.exceptions.MetafactureException;
26-
import org.culturegraph.mf.framework.ObjectReceiver;
18+
import java.io.FileOutputStream;
19+
import java.io.IOException;
20+
import java.io.OutputStream;
21+
import java.io.OutputStreamWriter;
22+
import java.io.Writer;
23+
import java.util.regex.Matcher;
24+
import java.util.regex.Pattern;
25+
26+
import org.culturegraph.mf.exceptions.MetafactureException;
27+
import org.culturegraph.mf.framework.ObjectReceiver;
28+
import org.culturegraph.mf.util.FileCompression;
2729

2830

2931
/**
@@ -43,7 +45,8 @@ public final class ObjectFileWriter<T> implements ObjectReceiver<T> {
4345
private int count;
4446
private Writer writer;
4547

46-
private String encoding = "UTF-8";
48+
private String encoding = "UTF-8";
49+
private FileCompression compression = FileCompression.AUTO;
4750

4851
public ObjectFileWriter(final String path) {
4952
this.path = path;
@@ -73,12 +76,36 @@ public String getEncoding() {
7376
public void setEncoding(final String encoding) {
7477
this.encoding = encoding;
7578
}
79+
80+
public FileCompression getCompression() {
81+
return compression;
82+
}
83+
84+
public void setCompression(final FileCompression compression) {
85+
this.compression = compression;
86+
}
87+
88+
public void setCompression(final String compression) {
89+
setCompression(FileCompression.valueOf(compression.toUpperCase()));
90+
}
7691

7792
private void startNewFile() {
7893
final Matcher matcher = VAR_PATTERN.matcher(this.path);
7994
final String path = matcher.replaceAll(String.valueOf(count));
80-
try {
81-
writer = new OutputStreamWriter(new FileOutputStream(path), encoding);
95+
try {
96+
final OutputStream file = new FileOutputStream(path);
97+
try {
98+
final OutputStream compressor = compression.createCompressor(file, path);
99+
try {
100+
writer = new OutputStreamWriter(compressor, encoding);
101+
} catch (IOException e) {
102+
compressor.close();
103+
throw e;
104+
}
105+
} catch (IOException e) {
106+
file.close();
107+
throw e;
108+
}
82109
} catch (IOException e) {
83110
throw new MetafactureException("Error creating file '" + path + "'.", e);
84111
}
@@ -112,6 +139,6 @@ public void closeStream() {
112139
} catch (IOException e) {
113140
throw new MetafactureException(e);
114141
}
115-
}
142+
}
116143

117144
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.stream.sink;
17+
18+
import static org.junit.Assert.assertArrayEquals;
19+
20+
import java.io.File;
21+
import java.io.FileInputStream;
22+
import java.io.IOException;
23+
import java.io.InputStream;
24+
import java.util.Arrays;
25+
26+
import org.apache.commons.io.IOUtils;
27+
import org.culturegraph.mf.util.FileCompression;
28+
import org.junit.Rule;
29+
import org.junit.Test;
30+
import org.junit.rules.TemporaryFolder;
31+
import org.junit.runner.RunWith;
32+
import org.junit.runners.Parameterized;
33+
import org.junit.runners.Parameterized.Parameters;
34+
35+
36+
/**
37+
*
38+
* Tests for file compression in {@link ObjectFileWriter}.
39+
*
40+
* @author Christoph Böhme
41+
*
42+
*/
43+
@RunWith(Parameterized.class)
44+
public final class ObjectFileWriterCompressionTest {
45+
46+
private static final String DATA = "This could have been a remarkable sentence.";
47+
48+
private static final String FILENAME_NONE = "compressed.txt";
49+
private static final String FILENAME_BZ2 = "compressed.txt.bz2";
50+
private static final String FILENAME_BZIP2 = "compressed.txt.bzip2";
51+
private static final String FILENAME_GZ = "compressed.txt.gz";
52+
private static final String FILENAME_GZIP = "compressed.txt.gzip";
53+
private static final String FILENAME_XZ = "compressed.txt.xz";
54+
55+
private static final byte[] MAGIC_BYTES_NONE = { 'T', 'h', 'i', 's' };
56+
private static final byte[] MAGIC_BYTES_BZIP2 = { 'B', 'Z', 'h' };
57+
private static final byte[] MAGIC_BYTES_GZIP = { (byte)0x1f, (byte)0x8b };
58+
private static final byte[] MAGIC_BYTES_XZ = { (byte)0xfd, '7', 'z', 'X', 'Z', (byte)0x00 };
59+
60+
// NO CHECKSTYLE VisibilityModifier FOR 3 LINES:
61+
// JUnit requires rules to be public
62+
@Rule
63+
public TemporaryFolder tempFolder = new TemporaryFolder();
64+
65+
private final String fileName;
66+
private final FileCompression compression;
67+
private final byte[] magicBytes;
68+
69+
public ObjectFileWriterCompressionTest(final String fileName, final FileCompression compression,
70+
final byte[] magicBytes) {
71+
this.fileName = fileName;
72+
this.compression = compression;
73+
this.magicBytes = magicBytes;
74+
}
75+
76+
@Parameters
77+
public static Iterable<Object[]> data() {
78+
return Arrays.asList(new Object[][] {
79+
{ FILENAME_NONE, FileCompression.AUTO, MAGIC_BYTES_NONE },
80+
{ FILENAME_BZ2, FileCompression.AUTO, MAGIC_BYTES_BZIP2 },
81+
{ FILENAME_BZIP2, FileCompression.AUTO, MAGIC_BYTES_BZIP2 },
82+
{ FILENAME_GZ, FileCompression.AUTO, MAGIC_BYTES_GZIP },
83+
{ FILENAME_GZIP, FileCompression.AUTO, MAGIC_BYTES_GZIP },
84+
{ FILENAME_XZ, FileCompression.AUTO, MAGIC_BYTES_XZ },
85+
{ FILENAME_NONE, FileCompression.NONE, MAGIC_BYTES_NONE },
86+
{ FILENAME_BZ2, FileCompression.BZIP2, MAGIC_BYTES_BZIP2 },
87+
{ FILENAME_GZ, FileCompression.GZIP, MAGIC_BYTES_GZIP },
88+
{ FILENAME_XZ, FileCompression.XZ, MAGIC_BYTES_XZ },
89+
});
90+
}
91+
92+
@Test
93+
public void testWriteCompressedFiles() throws IOException {
94+
// This test only looks at the magic byte sequences in the
95+
// files to decide whether a compressed file was written.
96+
97+
final File file = tempFolder.newFile(fileName);
98+
99+
final ObjectFileWriter<String> writer = new ObjectFileWriter<String>(file.getAbsolutePath());
100+
writer.setCompression(compression);
101+
writer.process(DATA);
102+
writer.closeStream();
103+
104+
final InputStream stream = new FileInputStream(file);
105+
final byte[] fileHeader;
106+
try { fileHeader = IOUtils.toByteArray(stream, magicBytes.length); }
107+
finally { stream.close(); }
108+
109+
assertArrayEquals(magicBytes, fileHeader);
110+
}
111+
112+
}

0 commit comments

Comments
 (0)