Skip to content

Commit 1819aff

Browse files
committed
Add follow URL redirections (metafacture-core#415)
1 parent 2cba849 commit 1819aff

File tree

2 files changed

+82
-19
lines changed

2 files changed

+82
-19
lines changed

metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java

Lines changed: 63 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,23 @@
2828
import org.apache.jena.rdf.model.Statement;
2929
import org.apache.jena.rdf.model.StmtIterator;
3030
import org.apache.jena.riot.RDFDataMgr;
31-
import org.apache.jena.riot.RiotNotFoundException;
3231
import org.apache.jena.shared.PropertyNotFoundException;
3332
import org.slf4j.Logger;
3433
import org.slf4j.LoggerFactory;
3534

35+
import java.io.IOException;
36+
import java.net.HttpURLConnection;
37+
import java.net.URL;
38+
import java.net.URLConnection;
3639
import java.util.ArrayList;
3740
import java.util.Collections;
3841
import java.util.HashMap;
3942
import java.util.Map;
4043
import java.util.NoSuchElementException;
41-
import java.util.Set;
4244

4345
/**
4446
* Provides a dynamically build {@link Map} based on an RDF resource. Can be one file or a comma separated list of RDF
45-
* files or an HTTP(S) URI.
47+
* files or an HTTP(S) URI. Redirections of HTTP(S) URIs are followed.
4648
* The resources are supposed to be UTF-8 encoded.
4749
* <p>
4850
*
@@ -57,6 +59,10 @@ public final class RdfMap extends AbstractReadOnlyMap<String, String> {
5759
private static String targetLanguage = "";
5860
private static String target;
5961
private static final Logger LOG = LoggerFactory.getLogger(RdfMap.class);
62+
private static final int MAX_REDIRECTIONS = 10;
63+
private static final int MIN_HTTP_STATUS_CODE = 299;
64+
private static final int MAX_HTTP_STATUS_CODE = 400;
65+
6066
private Model model;
6167
private boolean isUninitialized = true;
6268
private final ArrayList<String> filenames = new ArrayList<>();
@@ -104,15 +110,19 @@ private void loadFiles() {
104110
}
105111

106112
private void loadFile(final String file) {
113+
String f = file;
107114
try {
115+
if (file.toLowerCase().startsWith("http")) {
116+
f = read(file);
117+
}
108118
if (model == null) {
109-
model = RDFDataMgr.loadModel(file);
119+
model = RDFDataMgr.loadModel(f);
110120
}
111121
else {
112-
RDFDataMgr.read(model, file);
122+
RDFDataMgr.read(model, f);
113123
}
114124
}
115-
catch (final RiotNotFoundException e) {
125+
catch (final IOException e) {
116126
throw new FixExecutionException("rdf file: cannot read file", e);
117127
}
118128
}
@@ -140,7 +150,7 @@ public String get(final Object key) {
140150
if (isUninitialized) {
141151
init();
142152
}
143-
String ret = Maps.DEFAULT_MAP_KEY;
153+
String ret;
144154
if (map.containsKey(key.toString())) {
145155
ret = map.get(key.toString());
146156
}
@@ -160,7 +170,7 @@ public String get(final Object key) {
160170
//second try to get SUBJECT using PROPERTY and LITERAL
161171
ret = getSubjectUsingPropertyAndLiteral(key, targetProperty);
162172
//third try: get LITERAL of PREDICATE A using PREDICATE B
163-
if (ret == Maps.DEFAULT_MAP_KEY) {
173+
if (ret.equals(Maps.DEFAULT_MAP_KEY)) {
164174
ret = getLiteralOfPredicateUsingOtherPredicate(key, targetProperty);
165175
}
166176
else {
@@ -180,7 +190,7 @@ private String getLiteralOfPredicateUsingOtherPredicate(final Object key, final
180190
while (iter.hasNext()) {
181191
resource = iter.nextResource();
182192
if (resource.getProperty(targetProperty).getString().equals(key.toString())) {
183-
Statement stmt = resource.getProperty(targetProperty);
193+
Statement stmt;
184194
final StmtIterator iterProp = resource.listProperties(targetProperty);
185195
while (iterProp.hasNext()) {
186196
stmt = iterProp.nextStatement();
@@ -213,14 +223,6 @@ private String getSubjectUsingPropertyAndLiteral(final Object key, final Propert
213223
return ret;
214224
}
215225

216-
@Override
217-
public Set<String> keySet() {
218-
if (isUninitialized) {
219-
init();
220-
}
221-
return Collections.unmodifiableSet(map.keySet());
222-
}
223-
224226
/**
225227
* Sets the language of the target Property which is queried in the RDF. Valid values are defined by BCP47.
226228
* <br>
@@ -253,4 +255,48 @@ public void setTarget(final String target) {
253255
public void setDefault(final String defaultValue) {
254256
map.put(Maps.DEFAULT_MAP_KEY, defaultValue);
255257
}
258+
259+
/**
260+
* Gets a redirected URL, if any redirection takes place. Adapted predated code from org.apache.jena.rdfxml.xmlinput.JenaReader.
261+
*
262+
* @Deprecated Using newer jena version (needs java 11) this method would be obsolete.
263+
* @param url the URL to resolve
264+
* @return the (redirected) URL
265+
* @throws IOException if any IO error occurs
266+
*/
267+
private String read(final String url) throws IOException {
268+
String connectionURL = url;
269+
try {
270+
int count = 0;
271+
URLConnection conn;
272+
while (true) {
273+
final URLConnection conn2 = new URL(connectionURL).openConnection();
274+
if (!(conn2 instanceof HttpURLConnection)) {
275+
conn = conn2;
276+
break;
277+
}
278+
count += 1;
279+
if (count > MAX_REDIRECTIONS) {
280+
throw new IOException("Too many redirects followed for " + url);
281+
}
282+
final HttpURLConnection httpURLConnection = (HttpURLConnection) conn2;
283+
conn2.setRequestProperty("accept", "*/*");
284+
final int statusCode = httpURLConnection.getResponseCode();
285+
if (statusCode <= MIN_HTTP_STATUS_CODE || statusCode >= MAX_HTTP_STATUS_CODE) {
286+
conn = conn2;
287+
break;
288+
}
289+
// Redirect
290+
connectionURL = conn2.getHeaderField("Location");
291+
if (connectionURL == null || url.equals(connectionURL)) {
292+
throw new IOException("Failed to follow redirects for " + url);
293+
}
294+
}
295+
connectionURL = conn.getURL().toString();
296+
}
297+
catch (final IOException e) {
298+
throw new IOException(e);
299+
}
300+
return connectionURL;
301+
}
256302
}

metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,9 @@ public class MetafixLookupTest {
3838

3939
private static final String CSV_MAP = "src/test/resources/org/metafacture/metafix/maps/test.csv";
4040
private static final String RDF_MAP = "src/test/resources/org/metafacture/metafix/maps/test.ttl";
41+
private static final String RDF_URL = "http://purl.org/lobid/rpb";
4142
private static final String TSV_MAP = "src/test/resources/org/metafacture/metafix/maps/test.tsv";
42-
4343
private static final String LOOKUP = "lookup('title.*',";
44-
private static final String LOOKUP_IN_RDF = "lookup_in_rdf('prefLabel.*',";
4544

4645
@Mock
4746
private StreamReceiver streamReceiver;
@@ -994,6 +993,24 @@ public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicate(
994993
);
995994
}
996995

996+
@Test
997+
public void shouldLookupRdfUrlWithRedirection() {
998+
MetafixTestHelpers.assertFix(streamReceiver,
999+
Arrays.asList("lookup_rdf('prefLabel'," + " '" + RDF_URL + "', target:\"skos:prefLabel\")"
1000+
),
1001+
i -> {
1002+
i.startRecord("1");
1003+
i.literal("prefLabel", "http://purl.org/lobid/rpb#n882022");
1004+
i.endRecord();
1005+
},
1006+
o -> {
1007+
o.get().startRecord("1");
1008+
o.get().literal("prefLabel", "Presserecht");
1009+
o.get().endRecord();
1010+
}
1011+
);
1012+
}
1013+
9971014
@Test //Scenario 1:
9981015
public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicateOfSpecificLanguage() {
9991016
MetafixTestHelpers.assertFix(streamReceiver,

0 commit comments

Comments
 (0)