Skip to content

Commit d528ac9

Browse files
committed
Allow empty values in setreplace map (#420)
Allowing empty values in the map enables the SetReplacer to remove matching keys. Also fixes setting the separator by using an init() to avoid loading the map without taking all settings into account. Adds a rule to .editorconfig to allow the test map having trailing tabs.
1 parent 63496ad commit d528ac9

File tree

5 files changed

+118
-26
lines changed

5 files changed

+118
-26
lines changed

.editorconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,8 @@ indent_size = 2
3434
[metafacture-io/src/test/resources/org/metafacture/io/compressed.txt]
3535
insert_final_newline = false
3636

37+
[metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test.txt]
38+
trim_trailing_whitespace = false
39+
3740
[metafacture-runner/src/main/dist/config/java-options.conf]
3841
end_of_line = crlf

metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2013, 2014 Deutsche Nationalbibliothek
2+
* Copyright 2013, 2014, 2021 Deutsche Nationalbibliothek et al
33
*
44
* Licensed under the Apache License, Version 2.0 the "License";
55
* you may not use this file except in compliance with the License.
@@ -29,6 +29,7 @@
2929
import java.net.MalformedURLException;
3030
import java.net.URL;
3131
import java.nio.charset.StandardCharsets;
32+
import java.util.ArrayList;
3233
import java.util.Collections;
3334
import java.util.HashMap;
3435
import java.util.Map;
@@ -37,9 +38,16 @@
3738
import java.util.regex.Pattern;
3839

3940
/**
40-
* Provides a {@link Map} based on a file. The file is supposed to be UTF-8
41-
* encoded. The default separator is {@code \t}. <strong>Important:</strong>
42-
* Lines that are not split in two parts by the separator are ignored!
41+
* Provides a {@link Map} based on files. Can be one file or a comma separated list of files.
42+
* The files are supposed to be UTF-8 encoded. The default separator is {@code \t}.
43+
*
44+
* By setting {@link #allowEmptyValues} to {@code true} the values in the
45+
* {@link Map} can be empty thus enabling e.g.
46+
* {@link org.metafacture.metamorph.functions.SetReplace} to remove matching
47+
* keys.
48+
*
49+
* <strong>Important:</strong> All other lines that are not split in two parts
50+
* by the separator are ignored!
4351
*
4452
* @author Markus Michael Geipel
4553
*/
@@ -48,34 +56,55 @@ public final class FileMap extends AbstractReadOnlyMap<String, String> {
4856
private final Map<String, String> map = new HashMap<>();
4957

5058
private Pattern split = Pattern.compile("\t", Pattern.LITERAL);
59+
private boolean allowEmptyValues;
60+
private boolean isUninitialized = true;
61+
private ArrayList<String> filenames = new ArrayList<>();
5162

5263
/**
5364
* Creates an instance of {@link FileMap}.
5465
*/
5566
public FileMap() {
5667
}
5768

69+
private void init() {
70+
loadFiles();
71+
isUninitialized = false;
72+
}
73+
74+
/**
75+
* Sets whether to allow empty values in the {@link Map} or ignore these
76+
* entries.
77+
*
78+
* <strong>Default value: false </strong>
79+
*
80+
* @param allowEmptyValues true if empty values in the Map are allowed
81+
*/
82+
public void setAllowEmptyValues(final boolean allowEmptyValues) {
83+
this.allowEmptyValues = allowEmptyValues;
84+
}
85+
5886
/**
59-
* Sets a comma separated list of files which are then passed to
60-
* {@link #setFile}.
87+
* Sets a comma separated list of files which provides the {@link Map}.
6188
*
6289
* @param files a comma separated list of files
6390
*/
6491
public void setFiles(final String files) {
65-
final String[] parts = files.split("\\s*,\\s*");
66-
for (final String part : parts) {
67-
setFile(part);
68-
}
92+
Collections.addAll(filenames, files.split("\\s*,\\s*"));
6993
}
7094

7195
/**
72-
* Provides a {@link Map} based on a file. The file is supposed to be UTF-8
73-
* encoded. The default separator is {@code \t}. <strong>Important:</strong>
74-
* Lines that are not split in two parts by the separator are ignored!
75-
*
96+
* Sets a file which provides the {@link Map}.
7697
* @param file the file
7798
*/
7899
public void setFile(final String file) {
100+
Collections.addAll(filenames, file);
101+
}
102+
103+
private void loadFiles() {
104+
filenames.forEach(this::loadFile);
105+
}
106+
107+
private void loadFile(final String file) {
79108
try (
80109
InputStream stream = openStream(file);
81110
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))
@@ -85,7 +114,7 @@ public void setFile(final String file) {
85114
if (line.isEmpty()) {
86115
continue;
87116
}
88-
final String[] parts = split.split(line);
117+
final String[] parts = allowEmptyValues ? split.split(line, -1) : split.split(line);
89118
if (parts.length == 2) {
90119
map.put(parts[0], parts[1]);
91120
}
@@ -147,11 +176,17 @@ public void setSeparator(final String delimiter) {
147176

148177
@Override
149178
public String get(final Object key) {
179+
if (isUninitialized) {
180+
init();
181+
}
150182
return map.get(key);
151183
}
152184

153185
@Override
154186
public Set<String> keySet() {
187+
if (isUninitialized) {
188+
init();
189+
}
155190
return Collections.unmodifiableSet(map.keySet());
156191
}
157192

metamorph/src/main/resources/schemata/metamorph.xsd

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,6 @@
587587
</complexType>
588588
</element>
589589

590-
591590
<element name="filemap">
592591
<annotation>
593592
<documentation>Lookup table defined by text files</documentation>
@@ -598,16 +597,21 @@
598597
<documentation>Unique name of the lookup table</documentation>
599598
</annotation>
600599
</attribute>
600+
<attribute name="allowEmptyValues" type="boolean" use="optional" default="false">
601+
<annotation>
602+
<documentation>Allow empty values in Map.</documentation>
603+
</annotation>
604+
</attribute>
601605
<attribute name="files" type="string" use="required">
602606
<annotation>
603-
<documentation>Filenames</documentation>
607+
<documentation>Filename(s) referencing the lookup table(s). Can be one
608+
filename or a comma separated list of filenames.</documentation>
604609
</annotation>
605610
</attribute>
606-
<attribute name="separator" type="string" use="optional"
607-
default="\t">
611+
<attribute name="separator" type="string" use="optional" default="&#09;">
608612
<annotation>
609-
<documentation>String used in the files to separate key from value.
610-
</documentation>
613+
<documentation>String used in the files to separate keys from values.
614+
The default separator is the tabulator. </documentation>
611615
</annotation>
612616
</attribute>
613617
<attribute ref="xml:base" />
@@ -795,7 +799,7 @@
795799

796800
<element name="setreplace">
797801
<annotation>
798-
<documentation>Relace strings based on a replacement table.
802+
<documentation>Replace strings based on a replacement table.
799803
</documentation>
800804
</annotation>
801805
<complexType>

metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ public final class FileMapTest {
4747
"</rules>" +
4848
"<maps>" +
4949
" <filemap name='map1' files='org/metafacture/metamorph/maps/" +
50-
"file-map-test.txt' />" +
50+
"file-map-test.txt' %s/>" +
5151
"</maps>";
5252

5353
@Test
5454
public void shouldLookupValuesInFileBasedMap() {
55-
assertMorph(receiver, String.format(MORPH, "lookup in"),
55+
assertMorph(receiver, String.format(MORPH, "lookup in", ""),
5656
i -> {
5757
i.startRecord("1");
5858
i.literal("1", "gw");
@@ -70,7 +70,7 @@ public void shouldLookupValuesInFileBasedMap() {
7070

7171
@Test
7272
public void shouldWhitelistValuesInFileBasedMap() {
73-
assertMorph(receiver, String.format(MORPH, "whitelist map"),
73+
assertMorph(receiver, String.format(MORPH, "whitelist map", ""),
7474
i -> {
7575
i.startRecord("1");
7676
i.literal("1", "gw");
@@ -89,7 +89,7 @@ public void shouldWhitelistValuesInFileBasedMap() {
8989

9090
@Test
9191
public void shouldReplaceValuesUsingFileBasedMap() {
92-
assertMorph(receiver, String.format(MORPH, "setreplace map"),
92+
assertMorph(receiver, String.format(MORPH, "setreplace map", ""),
9393
i -> {
9494
i.startRecord("1");
9595
i.literal("1", "gw-fj: 1:1");
@@ -105,4 +105,53 @@ public void shouldReplaceValuesUsingFileBasedMap() {
105105
);
106106
}
107107

108+
@Test
109+
public void shouldReplaceCommaSeparatedValuesUsingFileBasedMapSetting() {
110+
assertMorph(receiver, String.format(MORPH, "setreplace map", "separator=\",\""),
111+
i -> {
112+
i.startRecord("1");
113+
i.literal("1", "gw");
114+
i.literal("1", "ry\tRyukyuIslands");
115+
i.endRecord();
116+
},
117+
o -> {
118+
o.get().startRecord("1");
119+
o.get().literal("1", "gw");
120+
o.get().literal("1", "Southern");
121+
o.get().endRecord();
122+
}
123+
);
124+
}
125+
126+
@Test
127+
public void shouldReplaceEmptyValuesUsingFileBasedMapSetting() {
128+
assertMorph(receiver, String.format(MORPH, "setreplace map", "allowEmptyValues=\"true\""),
129+
i -> {
130+
i.startRecord("1");
131+
i.literal("1", "zz");
132+
i.endRecord();
133+
},
134+
o -> {
135+
o.get().startRecord("1");
136+
o.get().literal("1", "");
137+
o.get().endRecord();
138+
}
139+
);
140+
}
141+
142+
@Test
143+
public void shouldNotReplaceEmptyValuesUsingFileBasedMapSetting() {
144+
assertMorph(receiver, String.format(MORPH, "setreplace map", ""),
145+
i -> {
146+
i.startRecord("1");
147+
i.literal("1", "zz");
148+
i.endRecord();
149+
},
150+
o -> {
151+
o.get().startRecord("1");
152+
o.get().literal("1", "zz");
153+
o.get().endRecord();
154+
}
155+
);
156+
}
108157
}

metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,3 +378,4 @@ ykc YukonTerritory
378378
ys Yemen(People'sDemocraticRepublic)
379379
yu SerbiaandMontenegro
380380
za Zambia
381+
zz

0 commit comments

Comments
 (0)