Skip to content

Commit 6e92d63

Browse files
committed
Merge #425 from '420-allowEmptyValuesInSetreplaceMap' of https://github.com/metafacture/metafacture-core
2 parents 63496ad + d528ac9 commit 6e92d63

File tree

5 files changed

+118
-26
lines changed

5 files changed

+118
-26
lines changed

.editorconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,8 @@ indent_size = 2
3434
[metafacture-io/src/test/resources/org/metafacture/io/compressed.txt]
3535
insert_final_newline = false
3636

37+
[metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test.txt]
38+
trim_trailing_whitespace = false
39+
3740
[metafacture-runner/src/main/dist/config/java-options.conf]
3841
end_of_line = crlf

metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2013, 2014 Deutsche Nationalbibliothek
2+
* Copyright 2013, 2014, 2021 Deutsche Nationalbibliothek et al
33
*
44
* Licensed under the Apache License, Version 2.0 the "License";
55
* you may not use this file except in compliance with the License.
@@ -29,6 +29,7 @@
2929
import java.net.MalformedURLException;
3030
import java.net.URL;
3131
import java.nio.charset.StandardCharsets;
32+
import java.util.ArrayList;
3233
import java.util.Collections;
3334
import java.util.HashMap;
3435
import java.util.Map;
@@ -37,9 +38,16 @@
3738
import java.util.regex.Pattern;
3839

3940
/**
40-
* Provides a {@link Map} based on a file. The file is supposed to be UTF-8
41-
* encoded. The default separator is {@code \t}. <strong>Important:</strong>
42-
* Lines that are not split in two parts by the separator are ignored!
41+
* Provides a {@link Map} based on files. Can be one file or a comma separated list of files.
42+
* The files are supposed to be UTF-8 encoded. The default separator is {@code \t}.
43+
*
44+
* By setting {@link #allowEmptyValues} to {@code true} the values in the
45+
* {@link Map} can be empty thus enabling e.g.
46+
* {@link org.metafacture.metamorph.functions.SetReplace} to remove matching
47+
* keys.
48+
*
49+
* <strong>Important:</strong> All other lines that are not split in two parts
50+
* by the separator are ignored!
4351
*
4452
* @author Markus Michael Geipel
4553
*/
@@ -48,34 +56,55 @@ public final class FileMap extends AbstractReadOnlyMap<String, String> {
4856
private final Map<String, String> map = new HashMap<>();
4957

5058
private Pattern split = Pattern.compile("\t", Pattern.LITERAL);
59+
private boolean allowEmptyValues;
60+
private boolean isUninitialized = true;
61+
private ArrayList<String> filenames = new ArrayList<>();
5162

5263
/**
5364
* Creates an instance of {@link FileMap}.
5465
*/
5566
public FileMap() {
5667
}
5768

69+
private void init() {
70+
loadFiles();
71+
isUninitialized = false;
72+
}
73+
74+
/**
75+
* Sets whether to allow empty values in the {@link Map} or ignore these
76+
* entries.
77+
*
78+
* <strong>Default value: false </strong>
79+
*
80+
* @param allowEmptyValues true if empty values in the Map are allowed
81+
*/
82+
public void setAllowEmptyValues(final boolean allowEmptyValues) {
83+
this.allowEmptyValues = allowEmptyValues;
84+
}
85+
5886
/**
59-
* Sets a comma separated list of files which are then passed to
60-
* {@link #setFile}.
87+
* Sets a comma separated list of files which provides the {@link Map}.
6188
*
6289
* @param files a comma separated list of files
6390
*/
6491
public void setFiles(final String files) {
65-
final String[] parts = files.split("\\s*,\\s*");
66-
for (final String part : parts) {
67-
setFile(part);
68-
}
92+
Collections.addAll(filenames, files.split("\\s*,\\s*"));
6993
}
7094

7195
/**
72-
* Provides a {@link Map} based on a file. The file is supposed to be UTF-8
73-
* encoded. The default separator is {@code \t}. <strong>Important:</strong>
74-
* Lines that are not split in two parts by the separator are ignored!
75-
*
96+
* Sets a file which provides the {@link Map}.
7697
* @param file the file
7798
*/
7899
public void setFile(final String file) {
100+
Collections.addAll(filenames, file);
101+
}
102+
103+
private void loadFiles() {
104+
filenames.forEach(this::loadFile);
105+
}
106+
107+
private void loadFile(final String file) {
79108
try (
80109
InputStream stream = openStream(file);
81110
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))
@@ -85,7 +114,7 @@ public void setFile(final String file) {
85114
if (line.isEmpty()) {
86115
continue;
87116
}
88-
final String[] parts = split.split(line);
117+
final String[] parts = allowEmptyValues ? split.split(line, -1) : split.split(line);
89118
if (parts.length == 2) {
90119
map.put(parts[0], parts[1]);
91120
}
@@ -147,11 +176,17 @@ public void setSeparator(final String delimiter) {
147176

148177
@Override
149178
public String get(final Object key) {
179+
if (isUninitialized) {
180+
init();
181+
}
150182
return map.get(key);
151183
}
152184

153185
@Override
154186
public Set<String> keySet() {
187+
if (isUninitialized) {
188+
init();
189+
}
155190
return Collections.unmodifiableSet(map.keySet());
156191
}
157192

metamorph/src/main/resources/schemata/metamorph.xsd

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,6 @@
587587
</complexType>
588588
</element>
589589

590-
591590
<element name="filemap">
592591
<annotation>
593592
<documentation>Lookup table defined by text files</documentation>
@@ -598,16 +597,21 @@
598597
<documentation>Unique name of the lookup table</documentation>
599598
</annotation>
600599
</attribute>
600+
<attribute name="allowEmptyValues" type="boolean" use="optional" default="false">
601+
<annotation>
602+
<documentation>Allow empty values in Map.</documentation>
603+
</annotation>
604+
</attribute>
601605
<attribute name="files" type="string" use="required">
602606
<annotation>
603-
<documentation>Filenames</documentation>
607+
<documentation>Filename(s) referencing the lookup table(s). Can be one
608+
filename or a comma separated list of filenames.</documentation>
604609
</annotation>
605610
</attribute>
606-
<attribute name="separator" type="string" use="optional"
607-
default="\t">
611+
<attribute name="separator" type="string" use="optional" default="&#09;">
608612
<annotation>
609-
<documentation>String used in the files to separate key from value.
610-
</documentation>
613+
<documentation>String used in the files to separate keys from values.
614+
The default separator is the tabulator. </documentation>
611615
</annotation>
612616
</attribute>
613617
<attribute ref="xml:base" />
@@ -795,7 +799,7 @@
795799

796800
<element name="setreplace">
797801
<annotation>
798-
<documentation>Relace strings based on a replacement table.
802+
<documentation>Replace strings based on a replacement table.
799803
</documentation>
800804
</annotation>
801805
<complexType>

metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ public final class FileMapTest {
4747
"</rules>" +
4848
"<maps>" +
4949
" <filemap name='map1' files='org/metafacture/metamorph/maps/" +
50-
"file-map-test.txt' />" +
50+
"file-map-test.txt' %s/>" +
5151
"</maps>";
5252

5353
@Test
5454
public void shouldLookupValuesInFileBasedMap() {
55-
assertMorph(receiver, String.format(MORPH, "lookup in"),
55+
assertMorph(receiver, String.format(MORPH, "lookup in", ""),
5656
i -> {
5757
i.startRecord("1");
5858
i.literal("1", "gw");
@@ -70,7 +70,7 @@ public void shouldLookupValuesInFileBasedMap() {
7070

7171
@Test
7272
public void shouldWhitelistValuesInFileBasedMap() {
73-
assertMorph(receiver, String.format(MORPH, "whitelist map"),
73+
assertMorph(receiver, String.format(MORPH, "whitelist map", ""),
7474
i -> {
7575
i.startRecord("1");
7676
i.literal("1", "gw");
@@ -89,7 +89,7 @@ public void shouldWhitelistValuesInFileBasedMap() {
8989

9090
@Test
9191
public void shouldReplaceValuesUsingFileBasedMap() {
92-
assertMorph(receiver, String.format(MORPH, "setreplace map"),
92+
assertMorph(receiver, String.format(MORPH, "setreplace map", ""),
9393
i -> {
9494
i.startRecord("1");
9595
i.literal("1", "gw-fj: 1:1");
@@ -105,4 +105,53 @@ public void shouldReplaceValuesUsingFileBasedMap() {
105105
);
106106
}
107107

108+
@Test
109+
public void shouldReplaceCommaSeparatedValuesUsingFileBasedMapSetting() {
110+
assertMorph(receiver, String.format(MORPH, "setreplace map", "separator=\",\""),
111+
i -> {
112+
i.startRecord("1");
113+
i.literal("1", "gw");
114+
i.literal("1", "ry\tRyukyuIslands");
115+
i.endRecord();
116+
},
117+
o -> {
118+
o.get().startRecord("1");
119+
o.get().literal("1", "gw");
120+
o.get().literal("1", "Southern");
121+
o.get().endRecord();
122+
}
123+
);
124+
}
125+
126+
@Test
127+
public void shouldReplaceEmptyValuesUsingFileBasedMapSetting() {
128+
assertMorph(receiver, String.format(MORPH, "setreplace map", "allowEmptyValues=\"true\""),
129+
i -> {
130+
i.startRecord("1");
131+
i.literal("1", "zz");
132+
i.endRecord();
133+
},
134+
o -> {
135+
o.get().startRecord("1");
136+
o.get().literal("1", "");
137+
o.get().endRecord();
138+
}
139+
);
140+
}
141+
142+
@Test
143+
public void shouldNotReplaceEmptyValuesUsingFileBasedMapSetting() {
144+
assertMorph(receiver, String.format(MORPH, "setreplace map", ""),
145+
i -> {
146+
i.startRecord("1");
147+
i.literal("1", "zz");
148+
i.endRecord();
149+
},
150+
o -> {
151+
o.get().startRecord("1");
152+
o.get().literal("1", "zz");
153+
o.get().endRecord();
154+
}
155+
);
156+
}
108157
}

metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,3 +378,4 @@ ykc YukonTerritory
378378
ys Yemen(People'sDemocraticRepublic)
379379
yu SerbiaandMontenegro
380380
za Zambia
381+
zz

0 commit comments

Comments
 (0)