Skip to content

Commit 85095c0

Browse files
committed
Merge #455 from branch 'addSupportForCompressedFileMapInput' of github.com:metafacture/metafacture-core
2 parents fce4535 + def47b9 commit 85095c0

File tree

7 files changed

+164
-41
lines changed

7 files changed

+164
-41
lines changed

metafacture-io/src/main/java/org/metafacture/io/FileOpener.java

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
*/
4141
@Description("Opens a file.")
4242
@In(String.class)
43-
@Out(java.io.Reader.class)
43+
@Out(Reader.class)
4444
@FluxCommand("open-file")
4545
public final class FileOpener extends DefaultObjectPipe<String, ObjectReceiver<Reader>> {
4646

@@ -66,8 +66,7 @@ public String getEncoding() {
6666
/**
6767
* Sets the encoding used to open the resource.
6868
*
69-
* @param encoding
70-
* new encoding
69+
* @param encoding new encoding
7170
*/
7271
public void setEncoding(final String encoding) {
7372
this.encoding = encoding;
@@ -83,7 +82,7 @@ public FileCompression getCompression() {
8382
}
8483

8584
/**
86-
* * Sets the compression of the file.
85+
* Sets the compression of the file.
8786
*
8887
* @param compression the {@link FileCompression}
8988
*/
@@ -94,7 +93,7 @@ public void setCompression(final FileCompression compression) {
9493
/**
9594
* Sets the compression of the file.
9695
*
97-
* @param compression the name of the compression.
96+
* @param compression the name of the compression
9897
*/
9998
public void setCompression(final String compression) {
10099
setCompression(FileCompression.valueOf(compression.toUpperCase()));
@@ -112,35 +111,52 @@ public boolean getDecompressConcatenated() {
112111
/**
113112
* Flags whether to use decompress concatenated file compression.
114113
*
115-
* @param decompressConcatenated true if file compression should be decompresses
116-
* concatenated
114+
* @param decompressConcatenated true if file compression should decompress concatenated
117115
*/
118116
public void setDecompressConcatenated(final boolean decompressConcatenated) {
119117
this.decompressConcatenated = decompressConcatenated;
120118
}
121119

122-
@Override
123-
public void process(final String file) {
120+
/**
121+
* Opens a file.
122+
*
123+
* @param file the file
124+
* @return a Reader
125+
* @throws IOException if an I/O error occurs
126+
*/
127+
public Reader open(final String file) throws IOException {
128+
return open(new FileInputStream(file));
129+
}
130+
131+
/**
132+
* Opens a file stream.
133+
*
134+
* @param stream the stream
135+
* @return a Reader
136+
* @throws IOException if an I/O error occurs
137+
*/
138+
public Reader open(final InputStream stream) throws IOException {
124139
try {
125-
final InputStream fileStream = new FileInputStream(file);
140+
final InputStream decompressor = compression.createDecompressor(stream, decompressConcatenated);
126141
try {
127-
final InputStream decompressor = compression.createDecompressor(fileStream, decompressConcatenated);
128-
try {
129-
130-
final Reader reader = new InputStreamReader(new BOMInputStream(
131-
decompressor), encoding);
132-
getReceiver().process(reader);
133-
}
134-
catch (final IOException | MetafactureException e) {
135-
decompressor.close();
136-
throw e;
137-
}
142+
return new InputStreamReader(new BOMInputStream(decompressor), encoding);
138143
}
139144
catch (final IOException | MetafactureException e) {
140-
fileStream.close();
145+
decompressor.close();
141146
throw e;
142147
}
143148
}
149+
catch (final IOException | MetafactureException e) {
150+
stream.close();
151+
throw e;
152+
}
153+
}
154+
155+
@Override
156+
public void process(final String file) {
157+
try {
158+
getReceiver().process(open(file));
159+
}
144160
catch (final IOException e) {
145161
throw new MetafactureException(e);
146162
}

metamorph/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ dependencies {
2222
api project(':metamorph-api')
2323
implementation project(':metafacture-commons')
2424
implementation project(':metafacture-flowcontrol')
25+
implementation project(':metafacture-io')
2526
implementation project(':metafacture-mangling')
2627
implementation project(':metafacture-javaintegration')
2728
implementation 'org.slf4j:slf4j-api:1.7.21'

metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package org.metafacture.metamorph.maps;
1818

19+
import org.metafacture.io.FileOpener;
1920
import org.metafacture.metamorph.api.MorphExecutionException;
2021
import org.metafacture.metamorph.api.helpers.AbstractReadOnlyMap;
2122

@@ -24,11 +25,10 @@
2425
import java.io.FileNotFoundException;
2526
import java.io.IOException;
2627
import java.io.InputStream;
27-
import java.io.InputStreamReader;
28+
import java.io.Reader;
2829
import java.io.UncheckedIOException;
2930
import java.net.MalformedURLException;
3031
import java.net.URL;
31-
import java.nio.charset.StandardCharsets;
3232
import java.util.ArrayList;
3333
import java.util.Collections;
3434
import java.util.HashMap;
@@ -38,8 +38,11 @@
3838
import java.util.regex.Pattern;
3939

4040
/**
41-
* Provides a {@link Map} based on files. Can be one file or a comma separated list of files.
42-
* The files are supposed to be UTF-8 encoded. The default separator is {@code \t}.
41+
* Provides a {@link Map} based on files. Can be a single file or a
42+
* comma-separated list of files.
43+
*
44+
* The default {@link #setEncoding encoding} is UTF-8.
45+
* The default {@link #setSeparator separator} is {@code \t}.
4346
*
4447
* By setting {@link #allowEmptyValues} to {@code true} the values in the
4548
* {@link Map} can be empty thus enabling e.g.
@@ -53,6 +56,7 @@
5356
*/
5457
public final class FileMap extends AbstractReadOnlyMap<String, String> {
5558

59+
private final FileOpener fileOpener = new FileOpener();
5660
private final Map<String, String> map = new HashMap<>();
5761

5862
private Pattern split = Pattern.compile("\t", Pattern.LITERAL);
@@ -100,17 +104,45 @@ public void setFile(final String file) {
100104
Collections.addAll(filenames, file);
101105
}
102106

107+
/**
108+
* Sets the encoding used to open the resource.
109+
*
110+
* @param encoding new encoding
111+
*/
112+
public void setEncoding(final String encoding) {
113+
fileOpener.setEncoding(encoding);
114+
}
115+
116+
/**
117+
* Sets the compression of the file.
118+
*
119+
* @param compression the name of the compression
120+
*/
121+
public void setCompression(final String compression) {
122+
fileOpener.setCompression(compression);
123+
}
124+
125+
/**
126+
* Flags whether to use decompress concatenated file compression.
127+
*
128+
* @param decompressConcatenated true if file compression should decompress concatenated
129+
*/
130+
public void setDecompressConcatenated(final boolean decompressConcatenated) {
131+
fileOpener.setDecompressConcatenated(decompressConcatenated);
132+
}
133+
103134
private void loadFiles() {
104135
filenames.forEach(this::loadFile);
105136
}
106137

107138
private void loadFile(final String file) {
108139
try (
109140
InputStream stream = openStream(file);
110-
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))
141+
Reader reader = fileOpener.open(stream);
142+
BufferedReader br = new BufferedReader(reader)
111143
) {
112144
String line;
113-
while ((line = reader.readLine()) != null) {
145+
while ((line = br.readLine()) != null) {
114146
if (line.isEmpty()) {
115147
continue;
116148
}
@@ -127,10 +159,9 @@ private void loadFile(final String file) {
127159

128160
private InputStream openStream(final String file) {
129161
return openAsFile(file)
130-
.orElseGet(() -> openAsResource(file)
131-
.orElseGet(() -> openAsUrl(file)
132-
.orElseThrow(() -> new MorphExecutionException(
133-
"File not found: " + file))));
162+
.orElseGet(() -> openAsResource(file)
163+
.orElseGet(() -> openAsUrl(file)
164+
.orElseThrow(() -> new MorphExecutionException("File not found: " + file))));
134165
}
135166

136167
private Optional<InputStream> openAsFile(final String file) {
@@ -166,7 +197,7 @@ private Optional<InputStream> openAsUrl(final String file) {
166197
/**
167198
* Sets the separator.
168199
*
169-
* <strong>Default value: {@code \t} </strong>
200+
* <strong>Default value: {@code \t}</strong>
170201
*
171202
* @param delimiter the separator
172203
*/

metamorph/src/main/resources/schemata/metamorph.xsd

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,21 @@
602602
<documentation>Allow empty values in Map.</documentation>
603603
</annotation>
604604
</attribute>
605+
<attribute name="compression" type="string" use="optional" default="auto">
606+
<annotation>
607+
<documentation>Sets the compression of the file.</documentation>
608+
</annotation>
609+
</attribute>
610+
<attribute name="decompressConcatenated" type="boolean" use="optional" default="false">
611+
<annotation>
612+
<documentation>Flags whether to use decompress concatenated file compression.</documentation>
613+
</annotation>
614+
</attribute>
615+
<attribute name="encoding" type="string" use="optional" default="UTF-8">
616+
<annotation>
617+
<documentation>Sets the encoding used to open the resource.</documentation>
618+
</annotation>
619+
</attribute>
605620
<attribute name="files" type="string" use="required">
606621
<annotation>
607622
<documentation>Filename(s) referencing the lookup table(s). Can be one

metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,12 @@ public final class FileMapTest {
4646
" </data>" +
4747
"</rules>" +
4848
"<maps>" +
49-
" <filemap name='map1' files='org/metafacture/metamorph/maps/" +
50-
"file-map-test.txt' %s/>" +
49+
" <filemap name='map1' files='org/metafacture/metamorph/maps/%s' %s/>" +
5150
"</maps>";
5251

5352
@Test
5453
public void shouldLookupValuesInFileBasedMap() {
55-
assertMorph(receiver, String.format(MORPH, "lookup in", ""),
54+
assertMorph(receiver, buildMorph("lookup in", ""),
5655
i -> {
5756
i.startRecord("1");
5857
i.literal("1", "gw");
@@ -70,7 +69,7 @@ public void shouldLookupValuesInFileBasedMap() {
7069

7170
@Test
7271
public void shouldWhitelistValuesInFileBasedMap() {
73-
assertMorph(receiver, String.format(MORPH, "whitelist map", ""),
72+
assertMorph(receiver, buildMorph("whitelist map", ""),
7473
i -> {
7574
i.startRecord("1");
7675
i.literal("1", "gw");
@@ -89,7 +88,7 @@ public void shouldWhitelistValuesInFileBasedMap() {
8988

9089
@Test
9190
public void shouldReplaceValuesUsingFileBasedMap() {
92-
assertMorph(receiver, String.format(MORPH, "setreplace map", ""),
91+
assertMorph(receiver, buildMorph("setreplace map", ""),
9392
i -> {
9493
i.startRecord("1");
9594
i.literal("1", "gw-fj: 1:1");
@@ -107,7 +106,7 @@ public void shouldReplaceValuesUsingFileBasedMap() {
107106

108107
@Test
109108
public void shouldReplaceCommaSeparatedValuesUsingFileBasedMapSetting() {
110-
assertMorph(receiver, String.format(MORPH, "setreplace map", "separator=\",\""),
109+
assertMorph(receiver, buildMorph("setreplace map", "separator=\",\""),
111110
i -> {
112111
i.startRecord("1");
113112
i.literal("1", "gw");
@@ -125,7 +124,7 @@ public void shouldReplaceCommaSeparatedValuesUsingFileBasedMapSetting() {
125124

126125
@Test
127126
public void shouldReplaceEmptyValuesUsingFileBasedMapSetting() {
128-
assertMorph(receiver, String.format(MORPH, "setreplace map", "allowEmptyValues=\"true\""),
127+
assertMorph(receiver, buildMorph("setreplace map", "allowEmptyValues=\"true\""),
129128
i -> {
130129
i.startRecord("1");
131130
i.literal("1", "zz");
@@ -141,7 +140,7 @@ public void shouldReplaceEmptyValuesUsingFileBasedMapSetting() {
141140

142141
@Test
143142
public void shouldNotReplaceEmptyValuesUsingFileBasedMapSetting() {
144-
assertMorph(receiver, String.format(MORPH, "setreplace map", ""),
143+
assertMorph(receiver, buildMorph("setreplace map", ""),
145144
i -> {
146145
i.startRecord("1");
147146
i.literal("1", "zz");
@@ -154,4 +153,65 @@ public void shouldNotReplaceEmptyValuesUsingFileBasedMapSetting() {
154153
}
155154
);
156155
}
156+
157+
@Test
158+
public void shouldLookupValuesInGzipFileMap() {
159+
assertMorph(receiver, buildMorph("lookup in", "file-map-test.txt.gz", ""),
160+
i -> {
161+
i.startRecord("1");
162+
i.literal("1", "gw");
163+
i.literal("1", "fj");
164+
i.endRecord();
165+
},
166+
o -> {
167+
o.get().startRecord("1");
168+
o.get().literal("1", "Germany");
169+
o.get().literal("1", "Fiji");
170+
o.get().endRecord();
171+
}
172+
);
173+
}
174+
175+
@Test
176+
public void shouldNotLookupValuesInBlockedGzipFileMapWithoutDecompressConcatenated() {
177+
assertMorph(receiver, buildMorph("lookup in", "file-map-test.txt.bgzf", ""),
178+
i -> {
179+
i.startRecord("1");
180+
i.literal("1", "gw");
181+
i.literal("1", "fj");
182+
i.endRecord();
183+
},
184+
o -> {
185+
o.get().startRecord("1");
186+
o.get().endRecord();
187+
}
188+
);
189+
}
190+
191+
@Test
192+
public void shouldLookupValuesInBlockedGzipFileMap() {
193+
assertMorph(receiver, buildMorph("lookup in", "file-map-test.txt.bgzf", "decompressConcatenated=\"true\""),
194+
i -> {
195+
i.startRecord("1");
196+
i.literal("1", "gw");
197+
i.literal("1", "fj");
198+
i.endRecord();
199+
},
200+
o -> {
201+
o.get().startRecord("1");
202+
o.get().literal("1", "Germany");
203+
o.get().literal("1", "Fiji");
204+
o.get().endRecord();
205+
}
206+
);
207+
}
208+
209+
private String buildMorph(final String data, final String options) {
210+
return buildMorph(data, "file-map-test.txt", options);
211+
}
212+
213+
private String buildMorph(final String data, final String map, final String options) {
214+
return String.format(MORPH, data, map, options);
215+
}
216+
157217
}
Binary file not shown.

0 commit comments

Comments
 (0)