Skip to content

Commit d223a4e

Browse files
authored
Merge pull request #266 from metafacture/265-filemapOptions
Add `FileMap` options to `put_filemap()` Fix function.
2 parents 9ee2194 + 4a674ef commit d223a4e

File tree

10 files changed

+67
-3
lines changed

10 files changed

+67
-3
lines changed

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ nothing()
177177

178178
##### `put_filemap`
179179

180-
Defines an external map for lookup from a file.
180+
Defines an external map for lookup from a file. Maps with more than 2 columns are supported but are reduced to a defined key and a value column.
181181

182182
```perl
183183
put_filemap("<sourceFile>", "<mapName>", sep_char: "\t")
@@ -190,6 +190,16 @@ The separator (`sep_char`) will vary depending on the source file, e.g.:
190190
| CSV | `,` or `;` |
191191
| TSV | `\t` |
192192

193+
Options:
194+
195+
- `allow_empty_values`: Sets whether to allow empty values in the filemap or to ignore these entries. (Default: `false`)
196+
- `compression`: Sets the compression of the file.
197+
- `decompress_concatenated`: Flags whether to use decompress concatenated file compression.
198+
- `encoding`: Sets the encoding used to open the resource.
199+
- `expected_columns`: Sets number of expected columns; lines with different number of columns are ignored. Set to `-1` to disable the check and allow arbitrary number of columns. (Default: `2`)
200+
- `key_column`: Defines the column to be used for keys. Uses zero index. (Default: `0`)
201+
- `value_column`: Defines the column to be used for values. Uses zero index. (Default: `1`)
202+
193203
##### `put_map`
194204

195205
Defines an internal map for lookup from key/value pairs.

build.gradle

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,14 @@ subprojects {
3434
ext {
3535
versions = [
3636
'ace': '1.3.3',
37+
'antlr': '3.2',
3738
'equalsverifier': '3.8.2',
3839
'jackson': '2.13.3',
3940
'jetty': '9.4.14.v20181114',
4041
'jquery': '3.3.1-1',
4142
'junit_jupiter': '5.8.2',
4243
'junit_platform': '1.4.2',
43-
'metafacture': '5.4.0',
44+
'metafacture': 'metafacture-core-5.4.1-rc1',
4445
'mockito': '2.27.0',
4546
'requirejs': '2.3.6',
4647
'slf4j': '1.7.21',

metafix-runner/build.gradle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ dependencies {
99
implementation "org.metafacture:metafacture-json:${versions.metafacture}"
1010
implementation "org.metafacture:metafacture-runner:${versions.metafacture}"
1111
implementation "org.metafacture:metafacture-xml:${versions.metafacture}"
12+
13+
implementation('org.antlr:antlr-runtime') {
14+
version { strictly versions.antlr }
15+
}
1216
}
1317

1418
application {

metafix/src/main/java/org/metafacture/metafix/FixMethod.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,15 @@ public void apply(final Metafix metafix, final Record record, final List<String>
7575
fileMap.setSeparator(options.getOrDefault(FILEMAP_SEPARATOR_OPTION, FILEMAP_DEFAULT_SEPARATOR));
7676
fileMap.setFile(metafix.resolvePath(fileName));
7777

78+
withOption(options, "allow_empty_values", fileMap::setAllowEmptyValues, this::getBoolean);
79+
withOption(options, "compression", fileMap::setCompression);
80+
withOption(options, "decompress_concatenated", fileMap::setDecompressConcatenated, this::getBoolean);
81+
withOption(options, "encoding", fileMap::setEncoding);
82+
withOption(options, "expected_columns", fileMap::setExpectedColumns, this::getInteger);
83+
withOption(options, "ignore_pattern", fileMap::setIgnorePattern);
84+
withOption(options, "key_column", fileMap::setKeyColumn, this::getInteger);
85+
withOption(options, "value_column", fileMap::setValueColumn, this::getInteger);
86+
7887
metafix.putMap(params.size() > 1 ? params.get(1) : fileName, fileMap);
7988
}
8089
},

metafix/src/main/java/org/metafacture/metafix/api/FixFunction.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.List;
2525
import java.util.Map;
2626
import java.util.Set;
27+
import java.util.function.BiFunction;
2728
import java.util.function.Consumer;
2829
import java.util.stream.Stream;
2930

@@ -33,15 +34,23 @@ public interface FixFunction {
3334
void apply(Metafix metafix, Record record, List<String> params, Map<String, String> options);
3435

3536
default void withOption(final Map<String, String> options, final String key, final Consumer<String> consumer) {
37+
withOption(options, key, consumer, Map::get);
38+
}
39+
40+
default <T> void withOption(final Map<String, String> options, final String key, final Consumer<T> consumer, final BiFunction<Map<String, String>, String, T> function) {
3641
if (options.containsKey(key)) {
37-
consumer.accept(options.get(key));
42+
consumer.accept(function.apply(options, key));
3843
}
3944
}
4045

4146
default boolean getBoolean(final Map<String, String> options, final String key) {
4247
return Boolean.parseBoolean(options.get(key));
4348
}
4449

50+
default int getInteger(final Map<String, String> options, final String key) {
51+
return Integer.parseInt(options.get(key));
52+
}
53+
4554
default int getInteger(final List<String> params, final int index) {
4655
return Integer.parseInt(params.get(index));
4756
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"name" : "RVK (Regensburger Verbundklassifikation)",
3+
"id" : "https://d-nb.info/gnd/4449787-8"
4+
}
5+
{
6+
"name" : "ZDB-Systematik",
7+
"id" : "http://bartoc.org/en/node/18915"
8+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"name": "rvk"
3+
}
4+
{
5+
"name": "zdbs"
6+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
rvk RVK (Regensburger Verbundklassifikation) https://d-nb.info/gnd/4449787-8
2+
udc UDC (Universal Decimal Classification) https://d-nb.info/gnd/4114037-0
3+
zdbs ZDB-Systematik http://bartoc.org/en/node/18915
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
put_filemap("./mapfile.tsv", "idLookup", sep_char:"\t",key_column:"1",value_column:"2",expected_columns:"3")
2+
put_filemap("./mapfile.tsv", "nameLookup", sep_char:"\t",expected_columns:"-1")
3+
4+
lookup("name", "nameLookup")
5+
copy_field("name","id")
6+
lookup("id", "idLookup")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
FLUX_DIR + "input.json"
2+
|open-file
3+
|as-records
4+
|decode-json
5+
|fix(FLUX_DIR + "test.fix")
6+
|encode-json(prettyPrinting="true")
7+
|write(FLUX_DIR + "output-metafix.json")
8+
;

0 commit comments

Comments
 (0)