Skip to content

Commit cbe60aa

Browse files
committed
Merge pull request #69 from cboehme/triple-object-writer
Added TripleObjectWriter module to write the object values of triples into files.
2 parents 6a78644 + abdbd83 commit cbe60aa

File tree

2 files changed

+194
-0
lines changed

2 files changed

+194
-0
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.stream.sink;
17+
18+
import java.io.File;
19+
import java.io.FileOutputStream;
20+
import java.io.IOException;
21+
import java.io.OutputStreamWriter;
22+
import java.io.Writer;
23+
24+
import org.apache.commons.io.FilenameUtils;
25+
import org.apache.commons.io.IOUtils;
26+
import org.culturegraph.mf.exceptions.MetafactureException;
27+
import org.culturegraph.mf.framework.DefaultObjectReceiver;
28+
import org.culturegraph.mf.framework.annotations.Description;
29+
import org.culturegraph.mf.framework.annotations.In;
30+
import org.culturegraph.mf.types.Triple;
31+
32+
/**
33+
* Writes the object value of the triple into a file. The filename
34+
* is constructed from subject and predicate.
35+
*
36+
* Please note: This module does not check if the filename constructed
37+
* from subject and predicate stays within {@code baseDir}. THIS MODULE
38+
* SHOULD NOT BE USED IN ENVIRONMENTS IN WHICH THE VALUES OF SUBJECT AND
39+
* PREDICATE A PROVIDED BY AN UNTRUSTED SOURCE!
40+
*
41+
* @author Christoph Böhme
42+
*/
43+
@Description("Writes the object value of the triple into a file. The filename is "
44+
+ "constructed from subject and predicate. Please note: This module does "
45+
+ "not check if the filename constructed from subject and predicate stays "
46+
+ "within `baseDir`. THIS MODULE SHOULD NOT BE USED IN ENVIRONMENTS IN WHICH "
47+
+ "THE VALUES OF SUBJECT AND PREDICATE A PROVIDED BY AN UNTRUSTED SOURCE!")
48+
@In(Triple.class)
49+
public final class TripleObjectWriter extends DefaultObjectReceiver<Triple> {
50+
51+
private final String baseDir;
52+
53+
private String encoding = "UTF-8";
54+
55+
public TripleObjectWriter(final String baseDir) {
56+
this.baseDir = baseDir;
57+
}
58+
59+
/**
60+
* Returns the encoding used to open the resource.
61+
*
62+
* @return current default setting
63+
*/
64+
public String getEncoding() {
65+
return encoding;
66+
}
67+
68+
/**
69+
* Sets the encoding used to open the resource.
70+
*
71+
* @param encoding
72+
* new encoding
73+
*/
74+
public void setEncoding(final String encoding) {
75+
this.encoding = encoding;
76+
}
77+
78+
@Override
79+
public void process(final Triple triple) {
80+
final String file = FilenameUtils.concat(
81+
FilenameUtils.concat(baseDir, triple.getSubject()), triple.getPredicate());
82+
83+
ensurePathExists(file);
84+
85+
try {
86+
final Writer writer = new OutputStreamWriter(new FileOutputStream(file), encoding);
87+
IOUtils.write(triple.getObject(), writer);
88+
writer.close();
89+
} catch (IOException e) {
90+
throw new MetafactureException(e);
91+
}
92+
}
93+
94+
private void ensurePathExists(final String path) {
95+
final File parent = new File(path).getAbsoluteFile().getParentFile();
96+
parent.mkdirs();
97+
}
98+
99+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.stream.sink;
17+
18+
import static org.junit.Assert.assertEquals;
19+
20+
import java.io.File;
21+
import java.io.FileInputStream;
22+
import java.io.IOException;
23+
import java.io.InputStream;
24+
25+
import org.apache.commons.io.IOUtils;
26+
import org.culturegraph.mf.types.Triple;
27+
import org.junit.After;
28+
import org.junit.Before;
29+
import org.junit.Rule;
30+
import org.junit.Test;
31+
import org.junit.rules.TemporaryFolder;
32+
33+
/**
34+
* Tests for {@link TripleObjectWriter}.
35+
*
36+
* @author Christoph Böhme
37+
*/
38+
public final class TripleObjectWriterTest {
39+
40+
private static final String SUBJECT1 = "subject1";
41+
private static final String SUBJECT2 = "subject2";
42+
private static final String STRUCTURED_SUBJECT_A = "a";
43+
private static final String STRUCTURED_SUBJECT_B = "b";
44+
private static final String STRUCTURED_SUBJECT = STRUCTURED_SUBJECT_A + "/" + STRUCTURED_SUBJECT_B;
45+
private static final String PREDICATE = "predicate";
46+
private static final String OBJECT1 = "object-data 1";
47+
private static final String OBJECT2 = "object-data 2";
48+
49+
private TripleObjectWriter tripleObjectWriter;
50+
51+
// NO CHECKSTYLE VisibilityModifier|DeclarationOrder FOR 3 LINES:
52+
// JUnit requires rules to be public
53+
@Rule
54+
public TemporaryFolder tempFolder = new TemporaryFolder();
55+
56+
private String baseDir;
57+
58+
@Before
59+
public void setup() throws IOException {
60+
baseDir = tempFolder.newFolder().getAbsolutePath();
61+
tripleObjectWriter = new TripleObjectWriter(baseDir);
62+
}
63+
64+
@After
65+
public void cleanup() {
66+
tripleObjectWriter.closeStream();
67+
}
68+
69+
@Test
70+
public void testShouldWriteObjectOfTripleIntoFile() throws IOException {
71+
tripleObjectWriter.process(new Triple(SUBJECT1, PREDICATE, OBJECT1));
72+
tripleObjectWriter.process(new Triple(SUBJECT2, PREDICATE, OBJECT2));
73+
74+
final String filename1 = baseDir + File.separator + SUBJECT1 + File.separator + PREDICATE;
75+
final String filename2 = baseDir + File.separator + SUBJECT2 + File.separator + PREDICATE;
76+
assertEquals(get(filename1), OBJECT1);
77+
assertEquals(get(filename2), OBJECT2);
78+
}
79+
80+
@Test
81+
public void testShouldMapStructuredSubjectsToDirectories() throws IOException {
82+
tripleObjectWriter.process(new Triple(STRUCTURED_SUBJECT, PREDICATE, OBJECT1));
83+
84+
final String filename = baseDir
85+
+ File.separator + STRUCTURED_SUBJECT_A + File.separator + STRUCTURED_SUBJECT_B
86+
+ File.separator + PREDICATE;
87+
assertEquals(get(filename), OBJECT1);
88+
}
89+
90+
private String get(final String filename) throws IOException {
91+
final InputStream stream = new FileInputStream(filename);
92+
return IOUtils.toString(stream, tripleObjectWriter.getEncoding());
93+
}
94+
95+
}

0 commit comments

Comments
 (0)