Skip to content

Commit b9fe7d0

Browse files
committed
complete implementation of rdf:List to cdt:List converter
1 parent 8112198 commit b9fe7d0

File tree

7 files changed

+691
-0
lines changed

7 files changed

+691
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*.zip
1919
*.tar.gz
2020
*.rar
21+
/target/
2122

2223
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
2324
hs_err_pid*

bin/common.sh

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#!/bin/sh
2+
# This script resolves CDT_TOOLS_HOME, locates the Java binary, and sets
3+
# the classpath to point to the correct JAR file.
4+
5+
# Function to resolve symbolic links and return the absolute path of a file
6+
resolveLink() {
7+
local NAME=$1 # Assign the first argument (the file name) to a local variable
8+
# Loop to resolve symbolic links until the actual file is found
9+
while [ -L "$NAME" ]; do
10+
case "$OSTYPE" in
11+
# For macOS or BSD systems, resolve the path using dirname and basename
12+
darwin*|bsd*) NAME=$(cd "$(dirname "$NAME")" && pwd -P)/$(basename "$NAME") ;;
13+
# For Linux and other systems, use readlink to resolve the full path
14+
*) NAME=$(readlink -f "$NAME") ;;
15+
esac
16+
done
17+
# Output the resolved absolute path
18+
echo "$NAME"
19+
}
20+
21+
# If CDT_TOOLS_HOME is not already set, resolve it based on the script's location
22+
if [ -z "$CDT_TOOLS_HOME" ]; then
23+
# Resolve the absolute path of the current script
24+
SCRIPT=$(resolveLink "$0")
25+
# Set CDT_TOOLS_HOME to the parent directory of the script's directory
26+
CDT_TOOLS_HOME=$(cd "$(dirname "$SCRIPT")/.." && pwd)
27+
# Export CDT_TOOLS_HOME so it can be used in child processes
28+
export CDT_TOOLS_HOME
29+
fi
30+
31+
# If JAVA is not set, locate the Java binary
32+
if [ -z "$JAVA" ]; then
33+
# If JAVA_HOME is set, use it to locate the Java binary
34+
if [ -z "$JAVA_HOME" ]; then
35+
JAVA=$(which java) # If JAVA_HOME is not set, fall back to finding java in the system PATH
36+
else
37+
JAVA="$JAVA_HOME/bin/java" # Use JAVA_HOME to find the Java binary
38+
fi
39+
fi
40+
41+
# If JAVA is still not set, print an error message and exit the script
42+
if [ -z "$JAVA" ]; then
43+
echo "Cannot find a Java JDK."
44+
echo "Please set JAVA or JAVA_HOME and ensure java (>=Java 17) is in your PATH." 1>&2
45+
exit 1 # Exit the script with an error code
46+
fi
47+
48+
# Look for the directory that is expected to contain the JAR file
49+
if [ -d "${CDT_TOOLS_HOME}/libs/" ]; then
50+
# If the libs directory exists, use it
51+
CDT_TOOLS_JAR_DIR=${CDT_TOOLS_HOME}/libs/
52+
elif [ -d "${CDT_TOOLS_HOME}/target/" ]; then
53+
# Otherwise, if target/ exists, use that one
54+
CDT_TOOLS_JAR_DIR=${CDT_TOOLS_HOME}/target/
55+
else
56+
# Otherwise, print an error message
57+
echo "Cannot find the directory ${CDT_TOOLS_HOME}/target/"
58+
echo "Did you forget to compile the project?"
59+
exit 2 # Exit the script with an error code
60+
fi
61+
62+
# After determining the directory, look for the JAR file in that directory, and ..
63+
CDT_TOOLS_CP=$(printf "%s\n" ${CDT_TOOLS_JAR_DIR}SPARQL-CDT-Tools-*.jar | grep -vE '(-sources|-javadoc)\.jar')
64+
# .. check that the JAR file is actually there
65+
if [ ! -f ${CDT_TOOLS_CP} ]; then
66+
echo "Cannot find the JAR file in ${CDT_TOOLS_JAR_DIR}"
67+
exit 3 # Exit the script with an error code
68+
fi

bin/convert-rdf-lists

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
3+
source ${SCRIPT_DIR}/common.sh
4+
5+
# Run the Java command
6+
"$JAVA" $JVM_ARGS -cp $CDT_TOOLS_CP de.olafhartig.ConvertRDFtoCDTList $@

pom.xml

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2+
<modelVersion>4.0.0</modelVersion>
3+
<groupId>de.olafhartig</groupId>
4+
<artifactId>SPARQL-CDT-Tools</artifactId>
5+
<version>0.0.1-SNAPSHOT</version>
6+
7+
<properties>
8+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
9+
<maven.compiler.source>17</maven.compiler.source>
10+
<maven.compiler.target>17</maven.compiler.target>
11+
</properties>
12+
13+
<dependencies>
14+
<dependency>
15+
<groupId>org.apache.jena</groupId>
16+
<artifactId>apache-jena-libs</artifactId>
17+
<type>pom</type>
18+
<version>5.3.0</version>
19+
</dependency>
20+
<dependency>
21+
<groupId>org.apache.jena</groupId>
22+
<artifactId>jena-cmds</artifactId>
23+
<version>5.3.0</version>
24+
</dependency>
25+
<dependency>
26+
<groupId>junit</groupId>
27+
<artifactId>junit</artifactId>
28+
<version>4.13.2</version>
29+
<scope>test</scope>
30+
</dependency>
31+
</dependencies>
32+
33+
<build>
34+
<plugins>
35+
36+
<!--
37+
<plugin>
38+
<artifactId>maven-assembly-plugin</artifactId>
39+
<executions>
40+
<execution>
41+
<phase>package</phase>
42+
<goals>
43+
<goal>single</goal>
44+
</goals>
45+
</execution>
46+
</executions>
47+
<configuration>
48+
<descriptorRefs>
49+
<descriptorRef>jar-with-dependencies</descriptorRef>
50+
</descriptorRefs>
51+
</configuration>
52+
</plugin>
53+
-->
54+
55+
<!-- the following is copied from Jena Fuseki -->
56+
<plugin>
57+
<groupId>org.apache.maven.plugins</groupId>
58+
<artifactId>maven-shade-plugin</artifactId>
59+
<version>3.2.0</version>
60+
<configuration>
61+
<shadedArtifactAttached>false</shadedArtifactAttached>
62+
<transformers>
63+
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
64+
<!-- <mainClass>com.amazon.aws.neptune.onegraph.playground.AppWindow</mainClass> -->
65+
</transformer>
66+
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
67+
<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer" />
68+
<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
69+
<addHeader>false</addHeader>
70+
</transformer>
71+
</transformers>
72+
<filters>
73+
<filter>
74+
<artifact>*:*</artifact>
75+
<excludes>
76+
<!-- Some jars are signed but shading breaks that.
77+
Don't include signing files.
78+
-->
79+
<exclude>META-INF/*.SF</exclude>
80+
<exclude>META-INF/*.DSA</exclude>
81+
<exclude>META-INF/*.RSA</exclude>
82+
</excludes>
83+
</filter>
84+
</filters>
85+
</configuration>
86+
<executions>
87+
<execution>
88+
<phase>package</phase>
89+
<goals>
90+
<goal>shade</goal>
91+
</goals>
92+
</execution>
93+
</executions>
94+
</plugin>
95+
<plugin>
96+
<groupId>org.apache.maven.plugins</groupId>
97+
<artifactId>maven-compiler-plugin</artifactId>
98+
<version>3.8.0</version>
99+
<configuration>
100+
<release>17</release>
101+
</configuration>
102+
</plugin>
103+
</plugins>
104+
</build>
105+
106+
</project>
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
package de.olafhartig;
2+
3+
import java.io.File;
4+
import java.io.FileInputStream;
5+
import java.io.FileNotFoundException;
6+
import java.io.FileOutputStream;
7+
import java.io.InputStream;
8+
import java.io.OutputStream;
9+
import java.net.URI;
10+
import java.util.HashSet;
11+
import java.util.Set;
12+
13+
import org.apache.jena.cmd.ArgDecl;
14+
import org.apache.jena.cmd.CmdException;
15+
import org.apache.jena.graph.Node;
16+
import org.apache.jena.graph.NodeFactory;
17+
import org.apache.jena.riot.Lang;
18+
import org.apache.jena.riot.RDFFormat;
19+
import org.apache.jena.riot.RDFLanguages;
20+
import org.apache.jena.riot.RDFParser;
21+
import org.apache.jena.riot.system.StreamRDF;
22+
import org.apache.jena.riot.system.StreamRDFWriter;
23+
24+
import arq.cmdline.CmdARQ;
25+
import arq.cmdline.ModLangOutput;
26+
import arq.cmdline.ModTime;
27+
import de.olafhartig.utils.RDFtoCDTListConverter;
28+
29+
public class ConvertRDFtoCDTList extends CmdARQ
30+
{
31+
protected final ModTime modTime = new ModTime();
32+
protected final ModLangOutput modLangOut = new ModLangOutput();
33+
34+
protected final ArgDecl argPredicate = new ArgDecl( ArgDecl.HasValue, "predicate" );
35+
protected final ArgDecl argInSyntax = new ArgDecl( ArgDecl.HasValue, "insyntax" );
36+
protected final ArgDecl argInFile = new ArgDecl( ArgDecl.HasValue, "infile" );
37+
protected final ArgDecl argOutFile = new ArgDecl( ArgDecl.HasValue, "outfile" );
38+
39+
public static void main( final String[] args ) {
40+
new ConvertRDFtoCDTList(args).mainRun();
41+
}
42+
43+
protected ConvertRDFtoCDTList( final String[] argv ) {
44+
super(argv);
45+
46+
addModule(modTime);
47+
48+
addModule(modLangOut);
49+
add( argOutFile, "--outfile", "File for the output data (if no such file is specified, then the tool writes to stdout)" );
50+
51+
getUsage().startCategory("Input control");
52+
add( argInSyntax, "--insyntax", "Specifies syntax of the input (if no syntax is specified, the syntax is guessed from the file extension of the input file)");
53+
add( argInFile, "--infile", "File with the input data (if no such file is specified, then the tool reads from stdin)" );
54+
55+
getUsage().startCategory("Conversion-related arguments");
56+
add( argPredicate, "--predicate", "Specifies the predicate IRI(s) to be considered for the conversion (this argument can be provided multiple times)");
57+
}
58+
59+
@Override
60+
protected String getSummary() {
61+
return "Usage: " + getCommandName() + " " +
62+
"--predicate=<predicate URI> " +
63+
"[ --insyntax=<syntax name> ] " +
64+
"[ --infile=<input file> ] " +
65+
"[ --outfile=<output file> ] ";
66+
}
67+
68+
@Override
69+
protected String getCommandName() {
70+
return "convert-rdf-lists";
71+
}
72+
73+
@Override
74+
protected void exec() {
75+
final InputStream inStream = determineInputStream();
76+
final Lang inSyntax = determineInputSyntax();
77+
78+
final RDFParser parser = RDFParser.create()
79+
.source(inStream)
80+
.lang(inSyntax)
81+
.build();
82+
83+
final Set<Node> predicates = determinePredicates();
84+
final StreamRDF outRDFStream = createOutputRDFStream();
85+
86+
final StreamRDF converter = new RDFtoCDTListConverter(predicates, outRDFStream);
87+
88+
if ( modTime.timingEnabled() ) {
89+
modTime.startTimer();
90+
}
91+
92+
parser.parse(converter);
93+
94+
if ( modTime.timingEnabled() ) {
95+
final long time = modTime.endTimer();
96+
System.out.println("Overall Processing Time: " + modTime.timeStr(time) + " sec");
97+
}
98+
}
99+
100+
101+
protected StreamRDF createOutputRDFStream() {
102+
if ( modLangOut.compressedOutput() ) {
103+
throw new CmdException("Compression of output not supported.");
104+
}
105+
106+
final RDFFormat format = modLangOut.getOutputStreamFormat();
107+
if ( format == null ) {
108+
throw new CmdException("Non-streaming output not supported.");
109+
}
110+
111+
final OutputStream outStream = determineOutputStream();
112+
final RDFFormat outSyntax = modLangOut.getOutputStreamFormat();
113+
114+
return StreamRDFWriter.getWriterStream(outStream, outSyntax);
115+
}
116+
117+
protected OutputStream determineOutputStream() {
118+
if ( ! contains(argOutFile) ) {
119+
return System.out;
120+
}
121+
122+
final String filename = getValue(argOutFile);
123+
final File file = new File(filename);
124+
125+
try {
126+
return new FileOutputStream(file);
127+
}
128+
catch ( final FileNotFoundException e ) {
129+
throw new CmdException( "There is a problem with the specified output file (" + filename + "): " + e.getMessage() );
130+
131+
}
132+
}
133+
134+
protected InputStream determineInputStream() {
135+
if ( ! contains(argInFile) ) {
136+
return System.in;
137+
}
138+
139+
final String filename = getValue(argInFile);
140+
final File file = new File(filename);
141+
142+
try {
143+
return new FileInputStream(file);
144+
}
145+
catch ( final FileNotFoundException e ) {
146+
throw new CmdException( "There is a problem with the specified input file (" + filename + "): " + e.getMessage() );
147+
}
148+
}
149+
150+
protected Lang determineInputSyntax() {
151+
if ( contains(argInSyntax) ) {
152+
final String syntax = getValue(argInSyntax);
153+
final Lang lang = RDFLanguages.nameToLang(syntax);
154+
if ( lang == null )
155+
throw new CmdException("Cannot detemine the syntax from '" + syntax + "'");
156+
return lang;
157+
}
158+
159+
if ( ! contains(argInFile) ) {
160+
throw new CmdException("Input syntax must be specified when reading from stdin.");
161+
}
162+
163+
final String filename = getValue(argInFile);
164+
final Lang lang = RDFLanguages.filenameToLang(filename);
165+
if ( lang == null ) {
166+
throw new CmdException("Cannot guess the input syntax for the given input file.");
167+
}
168+
169+
return lang;
170+
}
171+
172+
protected Set<Node> determinePredicates() {
173+
if ( ! contains(argPredicate) )
174+
throw new CmdException("No predicate URIs specified.");
175+
176+
//containsMultiple(argPredicate);
177+
final Set<Node> predicates = new HashSet<>();
178+
for ( final String p : getValues(argPredicate) ) {
179+
try {
180+
new URI(p);
181+
}
182+
catch ( final Exception e ) {
183+
throw new CmdException("One of the given predicate URIs does not seem to be a URI (" + p + ")");
184+
}
185+
186+
predicates.add( NodeFactory.createURI(p) );
187+
}
188+
189+
return predicates;
190+
}
191+
192+
}

0 commit comments

Comments
 (0)