Skip to content

Commit 9f397c7

Browse files
author
mgeipel
committed
Merge branch 'master' of ssh://github.com/culturegraph/metafacture-core
2 parents 235c8aa + f4c9e35 commit 9f397c7

35 files changed

+1737
-126
lines changed

.classpath

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@
2828
<attribute name="maven.pomderived" value="true"/>
2929
</attributes>
3030
</classpathentry>
31-
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6">
31+
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6">
3232
<attributes>
3333
<attribute name="maven.pomderived" value="true"/>
3434
</attributes>
3535
</classpathentry>
36-
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
36+
<classpathentry exported="true" kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
3737
<attributes>
3838
<attribute name="maven.pomderived" value="true"/>
3939
</attributes>
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
* Copyright 2013 Christoph Böhme
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package net.b3e.mf.extra.pipe;
17+
18+
import java.io.FileInputStream;
19+
import java.io.IOException;
20+
import java.io.InputStream;
21+
import java.security.MessageDigest;
22+
23+
import net.b3e.mf.extra.util.DigestAlgorithm;
24+
25+
import org.culturegraph.mf.exceptions.MetafactureException;
26+
import org.culturegraph.mf.framework.DefaultObjectPipe;
27+
import org.culturegraph.mf.framework.ObjectReceiver;
28+
import org.culturegraph.mf.framework.annotations.Description;
29+
import org.culturegraph.mf.framework.annotations.In;
30+
import org.culturegraph.mf.framework.annotations.Out;
31+
import org.culturegraph.mf.types.Triple;
32+
33+
/**
34+
* Uses the input string as a file name and computes a cryptographic hash the file.
35+
*
36+
* @author Christoph Böhme
37+
*
38+
*/
39+
@Description("Uses the input string as a file name and computes a cryptographic hash the file")
40+
@In(String.class)
41+
@Out(Triple.class)
42+
public final class FileDigestCalculator extends
43+
DefaultObjectPipe<String, ObjectReceiver<Triple>> {
44+
45+
private static final int BUFFER_SIZE = 1024;
46+
47+
private static final int HIGH_NIBBLE = 0xf0;
48+
private static final int LOW_NIBBLE = 0x0f;
49+
private static final char[] NIBBLE_TO_HEX =
50+
{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
51+
52+
private final DigestAlgorithm algorithm;
53+
private final MessageDigest messageDigest;
54+
55+
56+
public FileDigestCalculator(final DigestAlgorithm algorithm) {
57+
this.algorithm = algorithm;
58+
this.messageDigest = this.algorithm.getInstance();
59+
}
60+
61+
public FileDigestCalculator(final String algorithm) {
62+
this.algorithm = DigestAlgorithm.valueOf(algorithm.toUpperCase());
63+
this.messageDigest = this.algorithm.getInstance();
64+
}
65+
66+
@Override
67+
public void process(final String file) {
68+
final String digest;
69+
try (final InputStream stream = new FileInputStream(file)) {
70+
digest = bytesToHex(getDigest(stream, messageDigest));
71+
} catch (IOException e) {
72+
throw new MetafactureException(e);
73+
}
74+
getReceiver().process(new Triple(file, algorithm.name(), digest));
75+
}
76+
77+
private static byte[] getDigest(final InputStream stream, final MessageDigest messageDigest) throws IOException {
78+
final byte[] buffer = new byte[BUFFER_SIZE];
79+
80+
int read = stream.read(buffer, 0, BUFFER_SIZE);
81+
while (read > -1) {
82+
messageDigest.update(buffer, 0, read);
83+
read = stream.read(buffer, 0, BUFFER_SIZE);
84+
}
85+
return messageDigest.digest();
86+
}
87+
88+
private static String bytesToHex(final byte[] bytes) {
89+
final char[] hex = new char[bytes.length * 2];
90+
for (int i=0; i < bytes.length; ++i) {
91+
// NO CHECKSTYLE MagicNumber FOR 1 LINE:
92+
hex[i * 2] = NIBBLE_TO_HEX[(bytes[i] & HIGH_NIBBLE) >>> 4];
93+
hex[i * 2 + 1] = NIBBLE_TO_HEX[bytes[i] & LOW_NIBBLE];
94+
}
95+
return new String(hex);
96+
}
97+
98+
}
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/*
2+
* Copyright 2013 Christoph Böhme
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package net.b3e.mf.extra.source;
17+
18+
import java.io.IOException;
19+
import java.nio.file.FileVisitOption;
20+
import java.nio.file.FileVisitResult;
21+
import java.nio.file.Files;
22+
import java.nio.file.Path;
23+
import java.nio.file.Paths;
24+
import java.nio.file.SimpleFileVisitor;
25+
import java.nio.file.attribute.BasicFileAttributes;
26+
import java.util.EnumSet;
27+
import java.util.Set;
28+
29+
import org.culturegraph.mf.exceptions.MetafactureException;
30+
import org.culturegraph.mf.framework.DefaultObjectPipe;
31+
import org.culturegraph.mf.framework.ObjectReceiver;
32+
import org.culturegraph.mf.framework.annotations.Description;
33+
import org.culturegraph.mf.framework.annotations.In;
34+
import org.culturegraph.mf.framework.annotations.Out;
35+
import org.slf4j.Logger;
36+
import org.slf4j.LoggerFactory;
37+
38+
/**
39+
* Walks through a file tree tree and emits the name of
40+
* each file found.
41+
*
42+
* @author Christoph Böhme
43+
*
44+
*/
45+
@Description("Walks through a file tree and emits the name of each file found.")
46+
@In(String.class)
47+
@Out(String.class)
48+
public final class FileTreeWalker extends
49+
DefaultObjectPipe<String, ObjectReceiver<String>> {
50+
51+
private static final Logger LOG = LoggerFactory.getLogger(FileTreeWalker.class);
52+
53+
private final Visitor visitor = new Visitor();
54+
private final Set<FileVisitOption> visitOptions = EnumSet.noneOf(FileVisitOption.class);
55+
56+
private int maxDepth = Integer.MAX_VALUE;
57+
58+
/**
59+
* Returns whether symbolic links are followed.
60+
*
61+
* @return true if symbolic links are followed
62+
*/
63+
public boolean isFollowingLinks() {
64+
return visitOptions.contains(FileVisitOption.FOLLOW_LINKS);
65+
}
66+
67+
/**
68+
* Configures whether to follow symbolic links or not
69+
*
70+
* @param follow if true symbolic links are followed
71+
*/
72+
public void setFollowLinks(final boolean follow) {
73+
if (follow) {
74+
visitOptions.add(FileVisitOption.FOLLOW_LINKS);
75+
} else {
76+
visitOptions.remove(FileVisitOption.FOLLOW_LINKS);
77+
}
78+
}
79+
80+
/**
81+
* Returns the maximum depth to which the walker will descend
82+
* in the directory hierarchy.
83+
*
84+
* @return max visitation depth
85+
*/
86+
public int getMaxDepth() {
87+
return maxDepth;
88+
}
89+
90+
/**
91+
* Sets the maximum depth to which the walker should descend
92+
* in the directory hierarchy.
93+
*
94+
* @param maxDepth sets the visitation depth. 0 means only
95+
* visiting the start node. Integer.MAX_VALUE
96+
* means descend as deep as possible.
97+
*/
98+
public void setMaxDepth(final int maxDepth) {
99+
this.maxDepth = maxDepth;
100+
}
101+
102+
@Override
103+
public void process(final String directory) {
104+
try {
105+
Files.walkFileTree(Paths.get(directory), visitOptions, maxDepth, visitor);
106+
} catch (IOException e) {
107+
throw new MetafactureException(e);
108+
}
109+
}
110+
111+
/**
112+
* Visitor implementation
113+
*/
114+
private class Visitor extends SimpleFileVisitor<Path> {
115+
116+
@Override
117+
public FileVisitResult visitFile(final Path file,
118+
final BasicFileAttributes attrs) {
119+
if (attrs.isRegularFile()) {
120+
getReceiver().process(file.toAbsolutePath().toString());
121+
}
122+
return FileVisitResult.CONTINUE;
123+
}
124+
125+
@Override
126+
public FileVisitResult visitFileFailed(final Path file, final IOException exc) {
127+
LOG.warn("Failed visiting directory/file '{}': {}", file.toAbsolutePath().toString(), exc.toString());
128+
return FileVisitResult.CONTINUE;
129+
}
130+
131+
@Override
132+
public FileVisitResult postVisitDirectory(final Path dir, final IOException exc) {
133+
if (exc != null) {
134+
LOG.warn("Aborted directory visit '{}': {}", dir.toAbsolutePath().toString(), exc.toString());
135+
}
136+
return FileVisitResult.CONTINUE;
137+
138+
}
139+
140+
}
141+
142+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Copyright 2013 Christoph Böhme
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package net.b3e.mf.extra.util;
18+
19+
import java.security.MessageDigest;
20+
import java.security.NoSuchAlgorithmException;
21+
22+
import org.culturegraph.mf.exceptions.MetafactureException;
23+
24+
/**
25+
* Message digests which can be used by modules.
26+
*
27+
* @author Christoph Böhme
28+
*/
29+
public enum DigestAlgorithm {
30+
31+
MD2("MD2"),
32+
MD5("MD5"),
33+
SHA1("SHA-1"),
34+
SHA256("SHA-256"),
35+
SHA384("SHA-384"),
36+
SHA512 ("SHA-512");
37+
38+
private final String identifier;
39+
40+
private DigestAlgorithm(final String identifier) {
41+
this.identifier = identifier;
42+
}
43+
44+
public MessageDigest getInstance() {
45+
try {
46+
return MessageDigest.getInstance(identifier);
47+
} catch (NoSuchAlgorithmException e) {
48+
throw new MetafactureException (e);
49+
}
50+
}
51+
52+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
default in = FLUX_DIR + "filedigest.flux";
2+
3+
in
4+
|digest-file("md5")
5+
|write("stdout");
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
default in = ".";
2+
3+
in
4+
|walk-filetree
5+
|write("stdout");
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
walk-filetree = net.b3e.mf.extra.source.FileTreeWalker
2+
digest-file = net.b3e.mf.extra.pipe.FileDigestCalculator

quality_assurance/checkstyle_rules.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@
116116
</module>
117117
<module name="Header">
118118
<property name="headerFile" value="${basedir}/quality_assurance/java-header.txt"/>
119+
<!-- Ignore copyright line as it may contain varying names: -->
120+
<property name="ignoreLines" value="2"/>
119121
<property name="fileExtensions" value="java"/>
120122
</module>
121123
<module name="SuppressionFilter">

src/main/java/org/culturegraph/mf/morph/collectors/Range.java

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616
package org.culturegraph.mf.morph.collectors;
1717

18+
import java.util.Comparator;
1819
import java.util.SortedSet;
1920
import java.util.TreeSet;
2021

@@ -28,15 +29,37 @@
2829
* @author Christoph Böhme
2930
*/
3031
public final class Range extends AbstractCollect {
31-
private final SortedSet<Integer> values = new TreeSet<Integer>();
32+
private final SortedSet<Integer> values = new TreeSet<Integer>(new IncrementDependingComparator());
3233

34+
private int increment;
3335
private Integer first;
3436

37+
/**
38+
* A comparator which defines the sort order of the values in the range
39+
* depending on the increment.
40+
*/
41+
private class IncrementDependingComparator implements Comparator<Integer> {
42+
43+
@Override
44+
public int compare(final Integer o1, final Integer o2) {
45+
return Integer.signum(increment) * (o1 - o2);
46+
}
47+
48+
}
49+
3550
public Range(final Metamorph metamorph) {
3651
super(metamorph);
3752
setNamedValueReceiver(metamorph);
3853
}
3954

55+
public int getIncrement() {
56+
return increment;
57+
}
58+
59+
public void setIncrement(final int increment) {
60+
this.increment = increment;
61+
}
62+
4063
@Override
4164
protected void emit() {
4265
for (final Integer i: values) {
@@ -55,7 +78,7 @@ protected void receive(final String name, final String value, final NamedValueSo
5578
first = Integer.valueOf(value);
5679
} else {
5780
final int last = Integer.valueOf(value).intValue();
58-
for (int i = first.intValue(); i <= last; ++i) {
81+
for (int i = first.intValue(); (increment > 0 && i <= last) || (increment < 0 && i >= last); i += increment) {
5982
values.add(Integer.valueOf(i));
6083
}
6184
first = null;

0 commit comments

Comments
 (0)