Skip to content

Commit 3e67d2a

Browse files
author
mgeipel
committed
added missing class for metric calculator
1 parent 40e5764 commit 3e67d2a

File tree

1 file changed

+123
-0
lines changed

1 file changed

+123
-0
lines changed
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.stream.pipe.stat;
17+
18+
import java.util.Hashtable;
19+
import java.util.Map;
20+
import java.util.regex.Pattern;
21+
22+
import org.culturegraph.mf.framework.DefaultObjectPipe;
23+
import org.culturegraph.mf.framework.ObjectReceiver;
24+
import org.culturegraph.mf.types.Triple;
25+
26+
/**
27+
* Base class for operating on count data. The expected inputs are triples
28+
* containing as subject the variable name and as object the count. Marginal
29+
* counts must appear first, joint counts second. Marinal counts must be written
30+
* as 1:VARNAME, Joint counts as 2:FIRSTVARNAME&SECONDVARNAME.
31+
*
32+
* @author Markus Geipel
33+
*
34+
*/
35+
36+
public abstract class AbstractCountProcessor extends DefaultObjectPipe<Triple, ObjectReceiver<Triple>> {
37+
38+
39+
private static final Pattern KEY_SPLIT_PATTERN = Pattern.compile("&", Pattern.LITERAL);
40+
41+
private static final String MARGINAL_PREFIX = "1:";
42+
private static final String JOINT_PREFIX = "2:";
43+
44+
private final Map<String, Integer> marginals = new Hashtable<String, Integer>();
45+
private boolean inHeader = true;
46+
private int minCount;
47+
48+
protected final int getTotal() {
49+
return getMarginal("");
50+
}
51+
52+
protected final void setMinCount(final int min) {
53+
minCount = min;
54+
}
55+
56+
@Override
57+
public final void process(final Triple triple) {
58+
if (triple.getSubject().indexOf('&') == -1) {
59+
if (!inHeader) {
60+
throw new IllegalArgumentException(
61+
"Marginal counts and joint count must not be mixed. Marginal counts must appear first, joint counts second");
62+
}
63+
if (!triple.getSubject().startsWith(MARGINAL_PREFIX)) {
64+
throw new IllegalArgumentException("Marginal counts must start with '1:'");
65+
}
66+
final int marginal = Integer.parseInt(triple.getObject());
67+
if (marginal >= minCount) {
68+
69+
marginals.put(triple.getSubject().substring(2), Integer.valueOf(marginal));
70+
}
71+
72+
} else {
73+
inHeader = false;
74+
if (!triple.getSubject().startsWith(JOINT_PREFIX)) {
75+
throw new IllegalArgumentException("Joint counts must start with '2:'");
76+
}
77+
78+
final int nab = Integer.parseInt(triple.getObject());
79+
final String[] keyParts = KEY_SPLIT_PATTERN.split(triple.getSubject().substring(2));
80+
if (nab >= minCount) {
81+
82+
final int na = getMarginal(keyParts[0]);
83+
final int nb = getMarginal(keyParts[1]);
84+
processCount(keyParts[0], keyParts[1], na, nb, nab);
85+
}
86+
}
87+
}
88+
89+
protected abstract void processCount(final String varA, final String varB, final int countA, final int countB,
90+
final int countAandB);
91+
92+
private int getMarginal(final String string) {
93+
final Integer value = marginals.get(string);
94+
if(null==value){
95+
return 0;
96+
}
97+
return value.intValue();
98+
}
99+
100+
@Override
101+
protected final void onResetStream() {
102+
marginals.clear();
103+
inHeader = true;
104+
reset();
105+
}
106+
107+
protected void reset() {
108+
// nothing to do
109+
110+
}
111+
112+
@Override
113+
protected final void onCloseStream() {
114+
onResetStream();
115+
close();
116+
}
117+
118+
protected void close() {
119+
// nothing to do
120+
121+
}
122+
123+
}

0 commit comments

Comments
 (0)