Skip to content

Commit 17b836d

Browse files
committed
Sort only if we count, and sort by highest frequency first
See https://gitlab.com/oersi/oersi-etl/-/issues/238
1 parent f906faf commit 17b836d

File tree

2 files changed

+78
-27
lines changed

2 files changed

+78
-27
lines changed

metafix/src/main/java/org/metafacture/metafix/MetafixListPaths.java

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,15 @@
1818

1919
import org.metafacture.formatting.ObjectTemplate;
2020
import org.metafacture.framework.ObjectReceiver;
21+
import org.metafacture.framework.Sender;
2122
import org.metafacture.framework.helpers.DefaultStreamPipe;
22-
import org.metafacture.framework.objects.Triple;
23+
import org.metafacture.mangling.DuplicateObjectFilter;
2324
import org.metafacture.mangling.StreamFlattener;
2425
import org.metafacture.triples.AbstractTripleSort.Compare;
26+
import org.metafacture.triples.AbstractTripleSort.Order;
2527
import org.metafacture.triples.StreamToTriples;
2628
import org.metafacture.triples.TripleCount;
29+
import org.metafacture.triples.TripleSort;
2730

2831
import java.io.FileNotFoundException;
2932

@@ -50,17 +53,39 @@ public MetafixListPaths() {
5053

5154
@Override
5255
protected void onSetReceiver() {
56+
fix.setEntityMemberName(index ? "%d" : "*");
57+
final StreamToTriples triples = fix
58+
.setReceiver(new StreamFlattener())
59+
.setReceiver(new StreamToTriples());
60+
(count ? counted(triples) : unique(triples))
61+
.setReceiver(getReceiver());
62+
}
63+
64+
private Sender<ObjectReceiver<String>> counted(final StreamToTriples triples) {
65+
return triples
66+
.setReceiver(tripleCount())
67+
.setReceiver(tripleSort())
68+
.setReceiver(new ObjectTemplate<>("${s}\t ${o}"));
69+
}
70+
71+
private Sender<ObjectReceiver<String>> unique(final StreamToTriples triples) {
72+
return triples
73+
.setReceiver(new ObjectTemplate<>("${p}"))
74+
.setReceiver(new DuplicateObjectFilter<>());
75+
}
76+
77+
private TripleCount tripleCount() {
5378
final TripleCount tripleCount = new TripleCount();
5479
tripleCount.setCountBy(Compare.PREDICATE);
55-
if (!index) {
56-
fix.setEntityMemberName("*");
57-
}
58-
fix
59-
.setReceiver(new StreamFlattener())
60-
.setReceiver(new StreamToTriples())
61-
.setReceiver(tripleCount)
62-
.setReceiver(new ObjectTemplate<Triple>(count ? "${s}\t ${o}" : "${s}"))
63-
.setReceiver(getReceiver());
80+
return tripleCount;
81+
}
82+
83+
private TripleSort tripleSort() {
84+
final TripleSort tripleSort = new TripleSort();
85+
tripleSort.setNumeric(true);
86+
tripleSort.setBy(Compare.OBJECT);
87+
tripleSort.setOrder(Order.DECREASING);
88+
return tripleSort;
6489
}
6590

6691
@Override

metafix/src/test/java/org/metafacture/metafix/MetafixListPathsTest.java

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.metafacture.framework.ObjectReceiver;
2020

2121
import org.junit.jupiter.api.Test;
22+
import org.mockito.InOrder;
2223
import org.mockito.Mock;
2324
import org.mockito.Mockito;
2425
import org.mockito.MockitoAnnotations;
@@ -44,49 +45,74 @@ public MetafixListPathsTest() {
4445

4546
@Test
4647
public void testShouldListPaths() {
47-
processRecord();
48-
verify("a.*\t 3");
48+
verify(
49+
"c.*\t 3",
50+
"b.*\t 2",
51+
"a\t 1");
4952
}
5053

5154
@Test
5255
public void testShouldListPathsNoCount() {
5356
lister.setCount(false);
54-
processRecord();
55-
verify("a.*");
57+
verify(
58+
"a",
59+
"b.*",
60+
"c.*");
5661
}
5762

5863
@Test
5964
public void testShouldListPathsUseIndex() {
6065
lister.setIndex(true);
61-
processRecord();
62-
verify("a.1\t 1");
63-
verify("a.2\t 1");
64-
verify("a.3\t 1");
66+
verify(
67+
"a\t 1",
68+
"b.1\t 1",
69+
"b.2\t 1",
70+
"c.1\t 1",
71+
"c.2\t 1",
72+
"c.3\t 1");
6573
}
6674

6775
@Test
6876
public void testShouldListPathsNoCountUseIndex() {
6977
lister.setCount(false);
7078
lister.setIndex(true);
71-
processRecord();
72-
verify("a.1");
73-
verify("a.2");
74-
verify("a.3");
79+
verify(
80+
"a",
81+
"b.1",
82+
"b.2",
83+
"c.1",
84+
"c.2",
85+
"c.3");
86+
}
87+
88+
@Test
89+
public void testShouldListPathsSortedByFrequency() {
90+
verify(
91+
"c.*\t 3",
92+
"b.*\t 2",
93+
"a\t 1");
7594
}
7695

7796
private void processRecord() {
7897
lister.setReceiver(receiver);
7998
lister.startRecord("");
80-
lister.literal("a", "A");
81-
lister.literal("a", "B");
82-
lister.literal("a", "C");
99+
lister.literal("a", "");
100+
lister.literal("b", "");
101+
lister.literal("b", "");
102+
lister.literal("c", "");
103+
lister.literal("c", "");
104+
lister.literal("c", "");
83105
lister.endRecord();
84106
lister.closeStream();
85107
}
86108

87-
private void verify(final String result) throws MockitoAssertionError {
109+
private void verify(final String... result) throws MockitoAssertionError {
110+
processRecord();
88111
try {
89-
Mockito.verify(receiver).process(result);
112+
final InOrder ordered = Mockito.inOrder(receiver);
113+
for (final String r : result) {
114+
ordered.verify(receiver).process(r);
115+
}
90116
}
91117
catch (final MockitoAssertionError e) {
92118
System.out.println(Mockito.mockingDetails(receiver).printInvocations());

0 commit comments

Comments
 (0)