olehmberg
diff --git a/‎pom.xml‎
Lines changed: 6 additions & 0 deletions b/‎pom.xml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/main/java/de/uni_mannheim/informatik/dws/winter/model/defaultmodel/RDFRecordReader.java‎
Lines changed: 108 additions & 0 deletions b/‎src/main/java/de/uni_mannheim/informatik/dws/winter/model/defaultmodel/RDFRecordReader.java‎
Lines changed: 108 additions & 0 deletions
diff --git a/‎src/main/java/de/uni_mannheim/informatik/dws/winter/model/io/RDFMatchableReader.java‎
Lines changed: 80 additions & 0 deletions b/‎src/main/java/de/uni_mannheim/informatik/dws/winter/model/io/RDFMatchableReader.java‎
Lines changed: 80 additions & 0 deletions
@@ -129,6 +129,12 @@
 			<artifactId>commons-math3</artifactId>
 			<version>3.2</version>
 		</dependency>
+		<dependency>
+			<groupId>org.apache.jena</groupId>
+			<artifactId>apache-jena-libs</artifactId>
+			<type>pom</type>
+			<version>3.3.0</version>
+		</dependency>
 		<dependency>
 			<groupId>com.beust</groupId>
 			<artifactId>jcommander</artifactId>
 
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2017 Data and Web Science Group, University of Mannheim, Germany (http://dws.informatik.uni-mannheim.de/)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package de.uni_mannheim.informatik.dws.winter.model.defaultmodel;
+
+import java.io.File;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import de.uni_mannheim.informatik.dws.winter.model.DataSet;
+import de.uni_mannheim.informatik.dws.winter.model.io.RDFMatchableReader;
+
+/**
+ * @author Oliver Lehmberg (oli@dwslab.de)
+ *
+ */
+public class RDFRecordReader extends RDFMatchableReader<Record, Attribute> {
+
+	private int idIndex = -1;
+	private Map<String, Attribute> attributeMapping;
+
+	/**
+	 * 
+	 * @param idColumnIndex
+	 * 		The index of the column that contains the ID attribute. Specify -1 if the file does not contain a unique ID attribute.
+	 * @param attributeMapping
+	 * 		The position of a column and the corresponding attribute
+	 */
+	public RDFRecordReader(int idColumnIndex) {
+		this.idIndex = idColumnIndex;
+	}
+	
+	public RDFRecordReader(int idColumnIndex, Map<String, Attribute> attributeMapping) {
+		this.idIndex = idColumnIndex;
+		this.attributeMapping = attributeMapping;
+	}
+
+	/* (non-Javadoc)
+	 * @see de.uni_mannheim.informatik.wdi.model.io.CSVMatchableReader#readLine(java.lang.String[], de.uni_mannheim.informatik.wdi.model.DataSet)
+	 */
+	@Override
+	protected void readLine(File file, int rowNumber, String[] values, DataSet<Record, Attribute> dataset) {
+		
+		Set<String> ids = new HashSet<>();
+		
+		if(rowNumber==0) {
+			
+			for(int i = 0; i < values.length; i++) {
+				String v = values[i];
+				String attributeId = String.format("%s_Col%d", file.getName(), i);
+				Attribute a = new Attribute(attributeId, file.getAbsolutePath());
+				a.setName(v);
+				dataset.addAttribute(a);
+			}
+			
+		} else {
+			
+			String id = String.format("%s_%d", file.getName(), rowNumber);
+			
+			if(idIndex>=0 && values[idIndex]!=null) {
+				id = values[idIndex];
+				
+				if(ids.contains(id)) {
+					String replacementId = String.format("%s_%d", file.getName(), rowNumber);
+					System.err.println(String.format("Id '%s' (line %d) already exists, using '%s' instead!", id, rowNumber, replacementId));
+					id = replacementId;
+				}
+				
+				ids.add(id);
+			}
+			
+			Record r = new Record(id, file.getAbsolutePath());
+			
+			for(int i = 0; i < values.length; i++) {
+				Attribute a;
+				if(this.attributeMapping == null){
+					String attributeId = String.format("%s_Col%d", file.getName(), i);
+					a = dataset.getAttribute(attributeId);
+				}
+				else{
+					a = this.attributeMapping.get(Integer.toString(i));
+				}
+				
+				String v = values[i];
+				
+				if(v.isEmpty()) {
+					v = null;
+				}
+				
+				r.setValue(a, v);
+			}
+			
+			dataset.add(r);
+			
+		}
+		
+	}
+
+}
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 Data and Web Science Group, University of Mannheim, Germany (http://dws.informatik.uni-mannheim.de/)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package de.uni_mannheim.informatik.dws.winter.model.io;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.jena.query.Query;
+import org.apache.jena.query.QueryExecution;
+import org.apache.jena.query.QueryExecutionFactory;
+import org.apache.jena.query.QueryFactory;
+import org.apache.jena.query.QuerySolution;
+import org.apache.jena.query.ResultSet;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
+
+import de.uni_mannheim.informatik.dws.winter.model.DataSet;
+import de.uni_mannheim.informatik.dws.winter.model.Matchable;
+
+/**
+ * @author Oliver Lehmberg (oli@dwslab.de)
+ *
+ */
+public abstract class RDFMatchableReader<RecordType extends Matchable, SchemaElementType extends Matchable> {
+
+	public void loadFromRDF(File file, String sparqlQuery, DataSet<RecordType, SchemaElementType> dataset) throws IOException {
+		
+
+		// create an empty model
+		Model model = ModelFactory.createDefaultModel();
+
+		// read the RDF/XML file
+		model.read(new FileReader(file), null);
+
+		// read data
+		Map<String, Integer> attributes = new HashMap<>();
+		
+		int rowNumber = 0;
+		// execute the query
+		Query query = QueryFactory.create(sparqlQuery);
+		try (QueryExecution qexec = QueryExecutionFactory.create(query, model)) {
+			ResultSet results = qexec.execSelect();
+			for (; results.hasNext();) {
+				QuerySolution soln = results.nextSolution();
+
+				Iterator<String> attributeNames = soln.varNames();
+				while(attributeNames.hasNext()) {
+					String att = attributeNames.next();
+					if(!attributes.containsKey(att)) {
+						attributes.put(att, attributes.size());
+					}
+				}
+				
+				String[] values = new String[attributes.size()];
+				for(String att : attributes.keySet()) {
+					values[attributes.get(att)] = soln.get(att).toString();
+				}
+				readLine(file, rowNumber++, values, dataset);
+			}
+			
+			qexec.close();
+		}
+	}
+	
+	protected abstract void readLine(File file, int rowNumber, String[] values, DataSet<RecordType, SchemaElementType> dataset);
+	
+}