Skip to content

Commit eb4a2a4

Browse files
committed
Merge branch 'rdf_parser'
2 parents a306b39 + 22c26da commit eb4a2a4

File tree

8 files changed

+14390
-0
lines changed

8 files changed

+14390
-0
lines changed

pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,12 @@
129129
<artifactId>commons-math3</artifactId>
130130
<version>3.2</version>
131131
</dependency>
132+
<dependency>
133+
<groupId>org.apache.jena</groupId>
134+
<artifactId>apache-jena-libs</artifactId>
135+
<type>pom</type>
136+
<version>3.3.0</version>
137+
</dependency>
132138
<dependency>
133139
<groupId>com.beust</groupId>
134140
<artifactId>jcommander</artifactId>
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*
2+
* Copyright (c) 2017 Data and Web Science Group, University of Mannheim, Germany (http://dws.informatik.uni-mannheim.de/)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
* See the License for the specific language governing permissions and limitations under the License.
11+
*/
12+
package de.uni_mannheim.informatik.dws.winter.model.defaultmodel;
13+
14+
import java.io.File;
15+
import java.util.HashSet;
16+
import java.util.Map;
17+
import java.util.Set;
18+
19+
import de.uni_mannheim.informatik.dws.winter.model.DataSet;
20+
import de.uni_mannheim.informatik.dws.winter.model.io.RDFMatchableReader;
21+
22+
/**
23+
* @author Oliver Lehmberg (oli@dwslab.de)
24+
*
25+
*/
26+
public class RDFRecordReader extends RDFMatchableReader<Record, Attribute> {
27+
28+
private int idIndex = -1;
29+
private Map<String, Attribute> attributeMapping;
30+
31+
/**
32+
*
33+
* @param idColumnIndex
34+
* The index of the column that contains the ID attribute. Specify -1 if the file does not contain a unique ID attribute.
35+
* @param attributeMapping
36+
* The position of a column and the corresponding attribute
37+
*/
38+
public RDFRecordReader(int idColumnIndex) {
39+
this.idIndex = idColumnIndex;
40+
}
41+
42+
public RDFRecordReader(int idColumnIndex, Map<String, Attribute> attributeMapping) {
43+
this.idIndex = idColumnIndex;
44+
this.attributeMapping = attributeMapping;
45+
}
46+
47+
/* (non-Javadoc)
48+
* @see de.uni_mannheim.informatik.wdi.model.io.CSVMatchableReader#readLine(java.lang.String[], de.uni_mannheim.informatik.wdi.model.DataSet)
49+
*/
50+
@Override
51+
protected void readLine(File file, int rowNumber, String[] values, DataSet<Record, Attribute> dataset) {
52+
53+
Set<String> ids = new HashSet<>();
54+
55+
if(rowNumber==0) {
56+
57+
for(int i = 0; i < values.length; i++) {
58+
String v = values[i];
59+
String attributeId = String.format("%s_Col%d", file.getName(), i);
60+
Attribute a = new Attribute(attributeId, file.getAbsolutePath());
61+
a.setName(v);
62+
dataset.addAttribute(a);
63+
}
64+
65+
} else {
66+
67+
String id = String.format("%s_%d", file.getName(), rowNumber);
68+
69+
if(idIndex>=0 && values[idIndex]!=null) {
70+
id = values[idIndex];
71+
72+
if(ids.contains(id)) {
73+
String replacementId = String.format("%s_%d", file.getName(), rowNumber);
74+
System.err.println(String.format("Id '%s' (line %d) already exists, using '%s' instead!", id, rowNumber, replacementId));
75+
id = replacementId;
76+
}
77+
78+
ids.add(id);
79+
}
80+
81+
Record r = new Record(id, file.getAbsolutePath());
82+
83+
for(int i = 0; i < values.length; i++) {
84+
Attribute a;
85+
if(this.attributeMapping == null){
86+
String attributeId = String.format("%s_Col%d", file.getName(), i);
87+
a = dataset.getAttribute(attributeId);
88+
}
89+
else{
90+
a = this.attributeMapping.get(Integer.toString(i));
91+
}
92+
93+
String v = values[i];
94+
95+
if(v.isEmpty()) {
96+
v = null;
97+
}
98+
99+
r.setValue(a, v);
100+
}
101+
102+
dataset.add(r);
103+
104+
}
105+
106+
}
107+
108+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
* Copyright (c) 2017 Data and Web Science Group, University of Mannheim, Germany (http://dws.informatik.uni-mannheim.de/)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
* See the License for the specific language governing permissions and limitations under the License.
11+
*/
12+
package de.uni_mannheim.informatik.dws.winter.model.io;
13+
14+
import java.io.File;
15+
import java.io.FileReader;
16+
import java.io.IOException;
17+
import java.util.HashMap;
18+
import java.util.Iterator;
19+
import java.util.Map;
20+
21+
import org.apache.jena.query.Query;
22+
import org.apache.jena.query.QueryExecution;
23+
import org.apache.jena.query.QueryExecutionFactory;
24+
import org.apache.jena.query.QueryFactory;
25+
import org.apache.jena.query.QuerySolution;
26+
import org.apache.jena.query.ResultSet;
27+
import org.apache.jena.rdf.model.Model;
28+
import org.apache.jena.rdf.model.ModelFactory;
29+
30+
import de.uni_mannheim.informatik.dws.winter.model.DataSet;
31+
import de.uni_mannheim.informatik.dws.winter.model.Matchable;
32+
33+
/**
34+
* @author Oliver Lehmberg (oli@dwslab.de)
35+
*
36+
*/
37+
public abstract class RDFMatchableReader<RecordType extends Matchable, SchemaElementType extends Matchable> {
38+
39+
public void loadFromRDF(File file, String sparqlQuery, DataSet<RecordType, SchemaElementType> dataset) throws IOException {
40+
41+
42+
// create an empty model
43+
Model model = ModelFactory.createDefaultModel();
44+
45+
// read the RDF/XML file
46+
model.read(new FileReader(file), null);
47+
48+
// read data
49+
Map<String, Integer> attributes = new HashMap<>();
50+
51+
int rowNumber = 0;
52+
// execute the query
53+
Query query = QueryFactory.create(sparqlQuery);
54+
try (QueryExecution qexec = QueryExecutionFactory.create(query, model)) {
55+
ResultSet results = qexec.execSelect();
56+
for (; results.hasNext();) {
57+
QuerySolution soln = results.nextSolution();
58+
59+
Iterator<String> attributeNames = soln.varNames();
60+
while(attributeNames.hasNext()) {
61+
String att = attributeNames.next();
62+
if(!attributes.containsKey(att)) {
63+
attributes.put(att, attributes.size());
64+
}
65+
}
66+
67+
String[] values = new String[attributes.size()];
68+
for(String att : attributes.keySet()) {
69+
values[attributes.get(att)] = soln.get(att).toString();
70+
}
71+
readLine(file, rowNumber++, values, dataset);
72+
}
73+
74+
qexec.close();
75+
}
76+
}
77+
78+
protected abstract void readLine(File file, int rowNumber, String[] values, DataSet<RecordType, SchemaElementType> dataset);
79+
80+
}

0 commit comments

Comments
 (0)