Skip to content

Commit 6a78644

Browse files
committed
Merge pull request #70 from cboehme/triple-object-retriever
Added TripleObjectRetriever module
2 parents d753e54 + c660b84 commit 6a78644

File tree

2 files changed

+194
-0
lines changed

2 files changed

+194
-0
lines changed
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.stream.pipe;
17+
18+
import java.io.IOException;
19+
import java.net.URL;
20+
import java.net.URLConnection;
21+
22+
import org.apache.commons.io.IOUtils;
23+
import org.culturegraph.mf.exceptions.MetafactureException;
24+
import org.culturegraph.mf.framework.DefaultObjectPipe;
25+
import org.culturegraph.mf.framework.ObjectReceiver;
26+
import org.culturegraph.mf.framework.annotations.Description;
27+
import org.culturegraph.mf.framework.annotations.In;
28+
import org.culturegraph.mf.framework.annotations.Out;
29+
import org.culturegraph.mf.types.Triple;
30+
import org.culturegraph.mf.types.Triple.ObjectType;
31+
32+
/**
33+
* Uses the object value of the triple as a URL and emits a new triple
34+
* in which the object value is replaced with the contents of the resource
35+
* identified by the URL.
36+
*
37+
* @author Christoph Böhme
38+
*/
39+
@Description("Uses the object value of the triple as a URL and emits a new triple "
40+
+ "in which the object value is replaced with the contents of the resource "
41+
+ "identified by the URL.")
42+
@In(Triple.class)
43+
@Out(Triple.class)
44+
public final class TripleObjectRetriever
45+
extends DefaultObjectPipe<Triple, ObjectReceiver<Triple>> {
46+
47+
private String defaultEncoding = "UTF-8";
48+
49+
/**
50+
* Returns the default encoding used when no encoding is
51+
* provided by the server. The default setting is UTF-8.
52+
*
53+
* @return current default setting
54+
*/
55+
public String getDefaultEncoding() {
56+
return defaultEncoding;
57+
}
58+
59+
/**
60+
* Sets the default encoding to use when no encoding is
61+
* provided by the server. The default setting is UTF-8.
62+
*
63+
* @param defaultEncoding new default encoding
64+
*/
65+
public void setDefaultEncoding(final String defaultEncoding) {
66+
this.defaultEncoding = defaultEncoding;
67+
}
68+
69+
@Override
70+
public void process(final Triple triple) {
71+
assert !isClosed();
72+
73+
if (triple.getObjectType() != ObjectType.STRING) {
74+
return;
75+
}
76+
77+
final String objectValue;
78+
try {
79+
final URL url = new URL(triple.getObject());
80+
final URLConnection con = url.openConnection();
81+
String enc = con.getContentEncoding();
82+
if (enc == null) {
83+
enc = defaultEncoding;
84+
}
85+
objectValue = IOUtils.toString(con.getInputStream(), enc);
86+
} catch (IOException e) {
87+
throw new MetafactureException(e);
88+
}
89+
90+
getReceiver().process(new Triple(triple.getSubject(), triple.getPredicate(), objectValue));
91+
}
92+
93+
}
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.stream.pipe;
17+
18+
import static org.mockito.Mockito.verify;
19+
import static org.mockito.Mockito.verifyZeroInteractions;
20+
21+
import java.io.File;
22+
import java.io.FileWriter;
23+
import java.io.IOException;
24+
import java.io.Writer;
25+
26+
import org.apache.commons.io.IOUtils;
27+
import org.culturegraph.mf.framework.ObjectReceiver;
28+
import org.culturegraph.mf.types.Triple;
29+
import org.culturegraph.mf.types.Triple.ObjectType;
30+
import org.junit.After;
31+
import org.junit.Before;
32+
import org.junit.Rule;
33+
import org.junit.Test;
34+
import org.junit.rules.TemporaryFolder;
35+
import org.mockito.Mock;
36+
import org.mockito.MockitoAnnotations;
37+
38+
/**
39+
* Tests for {@link TripleObjectRetriever}.
40+
*
41+
* @author Christoph Böhme
42+
*/
43+
public final class TripleObjectRetrieverTest {
44+
45+
private static final String SUBJECT = "subject";
46+
private static final String PREDICATE = "predicate";
47+
private static final String OBJECT_VALUE = "object-data";
48+
private static final String ENTITY = "{l=v}";
49+
50+
private TripleObjectRetriever tripleObjectRetriever;
51+
52+
@Mock
53+
private ObjectReceiver<Triple> receiver;
54+
55+
// NO CHECKSTYLE VisibilityModifier|DeclarationOrder FOR 3 LINES:
56+
// JUnit requires rules to be public
57+
@Rule
58+
public TemporaryFolder tempFolder = new TemporaryFolder();
59+
60+
private String objectUrl;
61+
62+
@Before
63+
public void setup() throws IOException {
64+
MockitoAnnotations.initMocks(this);
65+
66+
tripleObjectRetriever = new TripleObjectRetriever();
67+
tripleObjectRetriever.setReceiver(receiver);
68+
69+
objectUrl = createObjectResource(OBJECT_VALUE);
70+
}
71+
72+
@After
73+
public void cleanup() {
74+
tripleObjectRetriever.closeStream();
75+
}
76+
77+
@Test
78+
public void testShouldReplaceObjectValueWithResourceContentRetrievedFromUrl() {
79+
tripleObjectRetriever.process(new Triple(SUBJECT, PREDICATE, objectUrl));
80+
81+
verify(receiver).process(new Triple(SUBJECT, PREDICATE, OBJECT_VALUE));
82+
}
83+
84+
@Test
85+
public void testShouldSkipTriplesWithObjectTypeEntity() {
86+
tripleObjectRetriever.process(new Triple(SUBJECT, PREDICATE, ENTITY, ObjectType.ENTITY));
87+
88+
verifyZeroInteractions(receiver);
89+
}
90+
91+
private String createObjectResource(final String contents) throws IOException {
92+
final File file = tempFolder.newFile();
93+
94+
final Writer writer = new FileWriter(file);
95+
IOUtils.write(contents, writer);
96+
writer.close();
97+
98+
return file.toURI().toURL().toString();
99+
}
100+
101+
}

0 commit comments

Comments
 (0)