Skip to content

Commit be0dbc8

Browse files
authored
Merge pull request #300 from metafacture/oersi-238
Flux commands for analyzing input data
2 parents cd384b9 + 172a701 commit be0dbc8

File tree

8 files changed

+511
-1
lines changed

8 files changed

+511
-1
lines changed

metafix/build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,12 @@ dependencies {
3232

3333
implementation "org.metafacture:metafacture-commons:${versions.metafacture}"
3434
implementation "org.metafacture:metafacture-flowcontrol:${versions.metafacture}"
35+
implementation "org.metafacture:metafacture-formatting:${versions.metafacture}"
3536
implementation "org.metafacture:metafacture-framework:${versions.metafacture}"
3637
implementation "org.metafacture:metafacture-io:${versions.metafacture}"
3738
implementation "org.metafacture:metafacture-javaintegration:${versions.metafacture}"
3839
implementation "org.metafacture:metafacture-mangling:${versions.metafacture}"
40+
implementation "org.metafacture:metafacture-triples:${versions.metafacture}"
3941
implementation "org.metafacture:metamorph:${versions.metafacture}"
4042

4143
testImplementation "nl.jqno.equalsverifier:equalsverifier:${versions.equalsverifier}"
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Copyright 2023 Fabian Steeg, hbz
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.metafacture.metafix;
18+
19+
import org.metafacture.framework.FluxCommand;
20+
import org.metafacture.framework.StreamReceiver;
21+
import org.metafacture.framework.annotations.Description;
22+
import org.metafacture.framework.annotations.In;
23+
import org.metafacture.framework.annotations.Out;
24+
import org.metafacture.triples.AbstractTripleSort.Compare;
25+
26+
/**
27+
* Provide a user-friendly way to list all paths available for processing in fix (see also {@link ListFixValues}).
28+
*
29+
* @author Fabian Steeg
30+
*/
31+
@Description("Lists all paths found in the input records. These paths can be used in a Fix to address fields. Options: " +
32+
"`count` (output occurence frequency of each path, sorted by highest frequency first; default: `true`), " +
33+
"`template` (for formatting the internal triple structure; default: `${o}\t|\t${s}` if count is true, else `${s}`)" +
34+
"`index` (output individual repeated subfields and array elements with index numbers instead of '*'; default: `false`)")
35+
@In(StreamReceiver.class)
36+
@Out(String.class)
37+
@FluxCommand("list-fix-paths")
38+
public class ListFixPaths extends MetafixStreamAnalyzer {
39+
40+
public ListFixPaths() {
41+
super("nothing()", Compare.PREDICATE);
42+
setIndex(false);
43+
}
44+
45+
public void setIndex(final boolean index) {
46+
getFix().setEntityMemberName(index ? Metafix.DEFAULT_ENTITY_MEMBER_NAME : "*");
47+
}
48+
49+
public boolean getIndex() {
50+
return getFix().getEntityMemberName().equals(Metafix.DEFAULT_ENTITY_MEMBER_NAME);
51+
}
52+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Copyright 2023 Fabian Steeg, hbz
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.metafacture.metafix;
18+
19+
import org.metafacture.framework.FluxCommand;
20+
import org.metafacture.framework.StreamReceiver;
21+
import org.metafacture.framework.annotations.Description;
22+
import org.metafacture.framework.annotations.In;
23+
import org.metafacture.framework.annotations.Out;
24+
import org.metafacture.triples.AbstractTripleSort.Compare;
25+
26+
/**
27+
* Provide a user-friendly way to list all values for a given path (see {@link ListFixPaths}).
28+
*
29+
* @author Fabian Steeg
30+
*/
31+
@Description("Lists all values found for the given path. The paths can be found using fix-list-paths. Options: " +
32+
"`count` (output occurence frequency of each value, sorted by highest frequency first; default: `true`)" +
33+
"`template` (for formatting the internal triple structure; default: `${o}\t|\t${s}` if count is true, else `${s}`)")
34+
@In(StreamReceiver.class)
35+
@Out(String.class)
36+
@FluxCommand("list-fix-values")
37+
public class ListFixValues extends MetafixStreamAnalyzer {
38+
39+
public ListFixValues(final String path) {
40+
super(fix(path), Compare.OBJECT);
41+
}
42+
43+
private static String fix(final String path) {
44+
return
45+
"copy_field(\"" + path + "\",\"value.$append\")\n" +
46+
"retain(\"value\")";
47+
}
48+
49+
}

metafix/src/main/java/org/metafacture/metafix/Metafix.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ public class Metafix implements StreamPipe<StreamReceiver>, Maps {
6464
public static final String VAR_START = "$[";
6565

6666
public static final Strictness DEFAULT_STRICTNESS = Strictness.PROCESS;
67+
public static final String DEFAULT_ENTITY_MEMBER_NAME = "%d";
6768

6869
public static final Map<String, String> NO_VARS = Collections.emptyMap();
6970

@@ -87,7 +88,7 @@ public class Metafix implements StreamPipe<StreamReceiver>, Maps {
8788
private Strictness strictness = DEFAULT_STRICTNESS;
8889
private String fixFile;
8990
private String recordIdentifier;
90-
private String entityMemberName = "%d";
91+
private String entityMemberName = DEFAULT_ENTITY_MEMBER_NAME;
9192
private boolean repeatedFieldsToEntities;
9293
private boolean strictnessHandlesProcessExceptions;
9394
private int entityCount;
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
* Copyright 2023 Fabian Steeg, hbz
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.metafacture.metafix;
18+
19+
import org.metafacture.formatting.ObjectTemplate;
20+
import org.metafacture.framework.MetafactureException;
21+
import org.metafacture.framework.ObjectReceiver;
22+
import org.metafacture.framework.helpers.DefaultStreamPipe;
23+
import org.metafacture.mangling.StreamFlattener;
24+
import org.metafacture.triples.AbstractTripleSort.Compare;
25+
import org.metafacture.triples.AbstractTripleSort.Order;
26+
import org.metafacture.triples.StreamToTriples;
27+
import org.metafacture.triples.TripleCount;
28+
import org.metafacture.triples.TripleSort;
29+
30+
import java.io.FileNotFoundException;
31+
32+
/**
33+
* Superclass for Metafix-based analyzer modules based on triples (see {@link org.metafacture.framework.objects.Triple}).
34+
*
35+
* @author Fabian Steeg
36+
*/
37+
/* package-private */ class MetafixStreamAnalyzer extends DefaultStreamPipe<ObjectReceiver<String>> {
38+
39+
private static final String DEFAULT_COUNTED_TEMPLATE = "${o}\t|\t${s}";
40+
private static final String DEFAULT_UNCOUNTED_TEMPLATE = "${s}";
41+
42+
private final Metafix fix;
43+
private boolean count = true;
44+
private final Compare countBy;
45+
private String template;
46+
47+
/* package-private */ MetafixStreamAnalyzer(final String fix, final Compare countBy) {
48+
try {
49+
this.fix = new Metafix(fix);
50+
this.fix.setRepeatedFieldsToEntities(true);
51+
}
52+
catch (final FileNotFoundException e) {
53+
throw new MetafactureException(e);
54+
}
55+
this.countBy = countBy;
56+
}
57+
58+
@Override
59+
protected void onSetReceiver() {
60+
template = template != null ? template : count ? DEFAULT_COUNTED_TEMPLATE : DEFAULT_UNCOUNTED_TEMPLATE;
61+
fix
62+
.setReceiver(new StreamFlattener())
63+
.setReceiver(new StreamToTriples())
64+
.setReceiver(tripleCount())
65+
.setReceiver(tripleSort())
66+
.setReceiver(new ObjectTemplate<>(template))
67+
.setReceiver(getReceiver());
68+
}
69+
70+
private TripleCount tripleCount() {
71+
final TripleCount tripleCount = new TripleCount();
72+
tripleCount.setCountBy(countBy);
73+
return tripleCount;
74+
}
75+
76+
private TripleSort tripleSort() {
77+
final TripleSort tripleSort = new TripleSort();
78+
tripleSort.setNumeric(count);
79+
tripleSort.setBy(count ? Compare.OBJECT : Compare.SUBJECT);
80+
tripleSort.setOrder(count ? Order.DECREASING : Order.INCREASING);
81+
return tripleSort;
82+
}
83+
84+
@Override
85+
public void startRecord(final String identifier) {
86+
fix.startRecord(identifier);
87+
}
88+
89+
@Override
90+
public void endRecord() {
91+
fix.endRecord();
92+
}
93+
94+
@Override
95+
public void startEntity(final String name) {
96+
fix.startEntity(name);
97+
}
98+
99+
@Override
100+
public void endEntity() {
101+
fix.endEntity();
102+
}
103+
104+
@Override
105+
public void literal(final String name, final String value) {
106+
fix.literal(name, value);
107+
}
108+
109+
@Override
110+
protected void onCloseStream() {
111+
fix.closeStream();
112+
}
113+
114+
@Override
115+
protected void onResetStream() {
116+
fix.resetStream();
117+
}
118+
119+
public void setCount(final boolean count) {
120+
this.count = count;
121+
}
122+
123+
public boolean getCount() {
124+
return this.count;
125+
}
126+
127+
public void setTemplate(final String template) {
128+
this.template = template;
129+
}
130+
131+
public String getTemplate() {
132+
return this.template;
133+
}
134+
135+
/* package-private */ Metafix getFix() {
136+
return this.fix;
137+
}
138+
139+
}

metafix/src/main/resources/flux-commands.properties

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,5 @@
1313
# limitations under the License.
1414
#
1515
fix org.metafacture.metafix.Metafix
16+
list-fix-paths org.metafacture.metafix.ListFixPaths
17+
list-fix-values org.metafacture.metafix.ListFixValues

0 commit comments

Comments
 (0)