Skip to content

Commit 4f2cebe

Browse files
committed
Added modules from Xoff/metafacture-extra.
Modules: - FileDigestCalculator - TripleReorder Merge remote-tracking branch 'mf-extra/move-to-core' into adding-modules-from-mf-core Conflicts: src/main/resources/flux-commands.properties
2 parents 2b1d440 + 3e4f6ab commit 4f2cebe

File tree

4 files changed

+247
-0
lines changed

4 files changed

+247
-0
lines changed
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/*
2+
* Copyright 2013 Christoph Böhme
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.stream.pipe;
17+
18+
import java.io.FileInputStream;
19+
import java.io.IOException;
20+
import java.io.InputStream;
21+
import java.security.MessageDigest;
22+
23+
24+
import org.culturegraph.mf.exceptions.MetafactureException;
25+
import org.culturegraph.mf.framework.DefaultObjectPipe;
26+
import org.culturegraph.mf.framework.ObjectReceiver;
27+
import org.culturegraph.mf.framework.annotations.Description;
28+
import org.culturegraph.mf.framework.annotations.In;
29+
import org.culturegraph.mf.framework.annotations.Out;
30+
import org.culturegraph.mf.stream.util.DigestAlgorithm;
31+
import org.culturegraph.mf.types.Triple;
32+
33+
/**
34+
* Uses the input string as a file name and computes a cryptographic hash the file.
35+
*
36+
* @author Christoph Böhme
37+
*
38+
*/
39+
@Description("Uses the input string as a file name and computes a cryptographic hash the file")
40+
@In(String.class)
41+
@Out(Triple.class)
42+
public final class FileDigestCalculator extends
43+
DefaultObjectPipe<String, ObjectReceiver<Triple>> {
44+
45+
private static final int BUFFER_SIZE = 1024;
46+
47+
private static final int HIGH_NIBBLE = 0xf0;
48+
private static final int LOW_NIBBLE = 0x0f;
49+
private static final char[] NIBBLE_TO_HEX =
50+
{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
51+
52+
private final DigestAlgorithm algorithm;
53+
private final MessageDigest messageDigest;
54+
55+
56+
public FileDigestCalculator(final DigestAlgorithm algorithm) {
57+
this.algorithm = algorithm;
58+
this.messageDigest = this.algorithm.getInstance();
59+
}
60+
61+
public FileDigestCalculator(final String algorithm) {
62+
this.algorithm = DigestAlgorithm.valueOf(algorithm.toUpperCase());
63+
this.messageDigest = this.algorithm.getInstance();
64+
}
65+
66+
@Override
67+
public void process(final String file) {
68+
final String digest;
69+
InputStream stream = null;
70+
try {
71+
stream = new FileInputStream(file);
72+
digest = bytesToHex(getDigest(stream, messageDigest));
73+
} catch (IOException e) {
74+
throw new MetafactureException(e);
75+
} finally {
76+
if (stream != null) {
77+
try { stream.close(); }
78+
catch (final IOException e) { }
79+
}
80+
}
81+
getReceiver().process(new Triple(file, algorithm.name(), digest));
82+
}
83+
84+
private static byte[] getDigest(final InputStream stream, final MessageDigest messageDigest) throws IOException {
85+
final byte[] buffer = new byte[BUFFER_SIZE];
86+
87+
int read = stream.read(buffer, 0, BUFFER_SIZE);
88+
while (read > -1) {
89+
messageDigest.update(buffer, 0, read);
90+
read = stream.read(buffer, 0, BUFFER_SIZE);
91+
}
92+
return messageDigest.digest();
93+
}
94+
95+
private static String bytesToHex(final byte[] bytes) {
96+
final char[] hex = new char[bytes.length * 2];
97+
for (int i=0; i < bytes.length; ++i) {
98+
// NO CHECKSTYLE MagicNumber FOR 1 LINE:
99+
hex[i * 2] = NIBBLE_TO_HEX[(bytes[i] & HIGH_NIBBLE) >>> 4];
100+
hex[i * 2 + 1] = NIBBLE_TO_HEX[bytes[i] & LOW_NIBBLE];
101+
}
102+
return new String(hex);
103+
}
104+
105+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* Copyright 2013 Christoph Böhme
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.culturegraph.mf.stream.pipe;
18+
19+
import org.culturegraph.mf.framework.DefaultObjectPipe;
20+
import org.culturegraph.mf.framework.ObjectReceiver;
21+
import org.culturegraph.mf.framework.annotations.Description;
22+
import org.culturegraph.mf.framework.annotations.In;
23+
import org.culturegraph.mf.framework.annotations.Out;
24+
import org.culturegraph.mf.types.Triple;
25+
26+
/**
27+
* Shifts subjectTo predicateTo and object around.
28+
*
29+
* @author Christoph Böhme
30+
*
31+
*/
32+
@Description("Shifts subjectTo predicateTo and object around")
33+
@In(Triple.class)
34+
@Out(Triple.class)
35+
public final class TripleReorder extends
36+
DefaultObjectPipe<Triple, ObjectReceiver<Triple>> {
37+
38+
/**
39+
* Names of the elements in the triple
40+
*/
41+
public enum TripleElement { SUBJECT, PREDICATE, OBJECT };
42+
// Do not change the item order because the process method
43+
// uses ordinal().
44+
45+
private TripleElement subjectFrom = TripleElement.SUBJECT;
46+
private TripleElement predicateFrom = TripleElement.PREDICATE;
47+
private TripleElement objectFrom = TripleElement.OBJECT;
48+
49+
public TripleElement getSubjectFrom() {
50+
return subjectFrom;
51+
}
52+
53+
public TripleElement getPredicateFrom() {
54+
return predicateFrom;
55+
}
56+
57+
public TripleElement getObjectFrom() {
58+
return objectFrom;
59+
}
60+
61+
public void setSubjectFrom(final TripleElement subjectFrom) {
62+
this.subjectFrom = subjectFrom;
63+
}
64+
65+
public void setPredicateFrom(final TripleElement predicateFrom) {
66+
this.predicateFrom = predicateFrom;
67+
}
68+
69+
public void setObjectFrom(final TripleElement objectFrom) {
70+
this.objectFrom = objectFrom;
71+
}
72+
73+
@Override
74+
public void process(final Triple triple) {
75+
final String[] elements = {
76+
triple.getSubject(),
77+
triple.getPredicate(),
78+
triple.getObject(),
79+
};
80+
81+
getReceiver().process(new Triple(
82+
elements[subjectFrom.ordinal()],
83+
elements[predicateFrom.ordinal()],
84+
elements[objectFrom.ordinal()]
85+
));
86+
}
87+
88+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Copyright 2013 Christoph Böhme
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.culturegraph.mf.stream.util;
18+
19+
import java.security.MessageDigest;
20+
import java.security.NoSuchAlgorithmException;
21+
22+
import org.culturegraph.mf.exceptions.MetafactureException;
23+
24+
/**
25+
* Message digests which can be used by modules.
26+
*
27+
* @author Christoph Böhme
28+
*/
29+
public enum DigestAlgorithm {
30+
31+
MD2("MD2"),
32+
MD5("MD5"),
33+
SHA1("SHA-1"),
34+
SHA256("SHA-256"),
35+
SHA384("SHA-384"),
36+
SHA512 ("SHA-512");
37+
38+
private final String identifier;
39+
40+
private DigestAlgorithm(final String identifier) {
41+
this.identifier = identifier;
42+
}
43+
44+
public MessageDigest getInstance() {
45+
try {
46+
return MessageDigest.getInstance(identifier);
47+
} catch (NoSuchAlgorithmException e) {
48+
throw new MetafactureException (e);
49+
}
50+
}
51+
52+
}

src/main/resources/flux-commands.properties

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,5 @@ normalize-utf8 org.culturegraph.mf.stream.pipe.Utf8Normalizer
8989
morph org.culturegraph.mf.morph.Metamorph
9090
filter org.culturegraph.mf.stream.pipe.Filter
9191
add-oreaggregation org.culturegraph.mf.stream.pipe.OreAggregationAdder
92+
digest-file org.culturegraph.mf.stream.pipe.FileDigestCalculator
93+
reorder-triple org.culturegraph.mf.stream.pipe.TripleReorder

0 commit comments

Comments
 (0)