|
| 1 | +package de.olafhartig; |
| 2 | + |
| 3 | +import java.io.File; |
| 4 | +import java.io.FileInputStream; |
| 5 | +import java.io.FileNotFoundException; |
| 6 | +import java.io.FileOutputStream; |
| 7 | +import java.io.InputStream; |
| 8 | +import java.io.OutputStream; |
| 9 | +import java.net.URI; |
| 10 | +import java.util.HashSet; |
| 11 | +import java.util.Set; |
| 12 | + |
| 13 | +import org.apache.jena.cmd.ArgDecl; |
| 14 | +import org.apache.jena.cmd.CmdException; |
| 15 | +import org.apache.jena.graph.Node; |
| 16 | +import org.apache.jena.graph.NodeFactory; |
| 17 | +import org.apache.jena.riot.Lang; |
| 18 | +import org.apache.jena.riot.RDFFormat; |
| 19 | +import org.apache.jena.riot.RDFLanguages; |
| 20 | +import org.apache.jena.riot.RDFParser; |
| 21 | +import org.apache.jena.riot.system.StreamRDF; |
| 22 | +import org.apache.jena.riot.system.StreamRDFWriter; |
| 23 | + |
| 24 | +import arq.cmdline.CmdARQ; |
| 25 | +import arq.cmdline.ModLangOutput; |
| 26 | +import arq.cmdline.ModTime; |
| 27 | +import de.olafhartig.utils.RDFtoCDTListConverter; |
| 28 | + |
| 29 | +public class ConvertRDFtoCDTList extends CmdARQ |
| 30 | +{ |
| 31 | + protected final ModTime modTime = new ModTime(); |
| 32 | + protected final ModLangOutput modLangOut = new ModLangOutput(); |
| 33 | + |
| 34 | + protected final ArgDecl argPredicate = new ArgDecl( ArgDecl.HasValue, "predicate" ); |
| 35 | + protected final ArgDecl argInSyntax = new ArgDecl( ArgDecl.HasValue, "insyntax" ); |
| 36 | + protected final ArgDecl argInFile = new ArgDecl( ArgDecl.HasValue, "infile" ); |
| 37 | + protected final ArgDecl argOutFile = new ArgDecl( ArgDecl.HasValue, "outfile" ); |
| 38 | + |
| 39 | + public static void main( final String[] args ) { |
| 40 | + new ConvertRDFtoCDTList(args).mainRun(); |
| 41 | + } |
| 42 | + |
| 43 | + protected ConvertRDFtoCDTList( final String[] argv ) { |
| 44 | + super(argv); |
| 45 | + |
| 46 | + addModule(modTime); |
| 47 | + |
| 48 | + addModule(modLangOut); |
| 49 | + add( argOutFile, "--outfile", "File for the output data (if no such file is specified, then the tool writes to stdout)" ); |
| 50 | + |
| 51 | + getUsage().startCategory("Input control"); |
| 52 | + add( argInSyntax, "--insyntax", "Specifies syntax of the input (if no syntax is specified, the syntax is guessed from the file extension of the input file)"); |
| 53 | + add( argInFile, "--infile", "File with the input data (if no such file is specified, then the tool reads from stdin)" ); |
| 54 | + |
| 55 | + getUsage().startCategory("Conversion-related arguments"); |
| 56 | + add( argPredicate, "--predicate", "Specifies the predicate IRI(s) to be considered for the conversion (this argument can be provided multiple times)"); |
| 57 | + } |
| 58 | + |
| 59 | + @Override |
| 60 | + protected String getSummary() { |
| 61 | + return "Usage: " + getCommandName() + " " + |
| 62 | + "--predicate=<predicate URI> " + |
| 63 | + "[ --insyntax=<syntax name> ] " + |
| 64 | + "[ --infile=<input file> ] " + |
| 65 | + "[ --outfile=<output file> ] "; |
| 66 | + } |
| 67 | + |
| 68 | + @Override |
| 69 | + protected String getCommandName() { |
| 70 | + return "convert-rdf-lists"; |
| 71 | + } |
| 72 | + |
| 73 | + @Override |
| 74 | + protected void exec() { |
| 75 | + final InputStream inStream = determineInputStream(); |
| 76 | + final Lang inSyntax = determineInputSyntax(); |
| 77 | + |
| 78 | + final RDFParser parser = RDFParser.create() |
| 79 | + .source(inStream) |
| 80 | + .lang(inSyntax) |
| 81 | + .build(); |
| 82 | + |
| 83 | + final Set<Node> predicates = determinePredicates(); |
| 84 | + final StreamRDF outRDFStream = createOutputRDFStream(); |
| 85 | + |
| 86 | + final StreamRDF converter = new RDFtoCDTListConverter(predicates, outRDFStream); |
| 87 | + |
| 88 | + if ( modTime.timingEnabled() ) { |
| 89 | + modTime.startTimer(); |
| 90 | + } |
| 91 | + |
| 92 | + parser.parse(converter); |
| 93 | + |
| 94 | + if ( modTime.timingEnabled() ) { |
| 95 | + final long time = modTime.endTimer(); |
| 96 | + System.out.println("Overall Processing Time: " + modTime.timeStr(time) + " sec"); |
| 97 | + } |
| 98 | + } |
| 99 | + |
| 100 | + |
| 101 | + protected StreamRDF createOutputRDFStream() { |
| 102 | + if ( modLangOut.compressedOutput() ) { |
| 103 | + throw new CmdException("Compression of output not supported."); |
| 104 | + } |
| 105 | + |
| 106 | + final RDFFormat format = modLangOut.getOutputStreamFormat(); |
| 107 | + if ( format == null ) { |
| 108 | + throw new CmdException("Non-streaming output not supported."); |
| 109 | + } |
| 110 | + |
| 111 | + final OutputStream outStream = determineOutputStream(); |
| 112 | + final RDFFormat outSyntax = modLangOut.getOutputStreamFormat(); |
| 113 | + |
| 114 | + return StreamRDFWriter.getWriterStream(outStream, outSyntax); |
| 115 | + } |
| 116 | + |
| 117 | + protected OutputStream determineOutputStream() { |
| 118 | + if ( ! contains(argOutFile) ) { |
| 119 | + return System.out; |
| 120 | + } |
| 121 | + |
| 122 | + final String filename = getValue(argOutFile); |
| 123 | + final File file = new File(filename); |
| 124 | + |
| 125 | + try { |
| 126 | + return new FileOutputStream(file); |
| 127 | + } |
| 128 | + catch ( final FileNotFoundException e ) { |
| 129 | + throw new CmdException( "There is a problem with the specified output file (" + filename + "): " + e.getMessage() ); |
| 130 | + |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + protected InputStream determineInputStream() { |
| 135 | + if ( ! contains(argInFile) ) { |
| 136 | + return System.in; |
| 137 | + } |
| 138 | + |
| 139 | + final String filename = getValue(argInFile); |
| 140 | + final File file = new File(filename); |
| 141 | + |
| 142 | + try { |
| 143 | + return new FileInputStream(file); |
| 144 | + } |
| 145 | + catch ( final FileNotFoundException e ) { |
| 146 | + throw new CmdException( "There is a problem with the specified input file (" + filename + "): " + e.getMessage() ); |
| 147 | + } |
| 148 | + } |
| 149 | + |
| 150 | + protected Lang determineInputSyntax() { |
| 151 | + if ( contains(argInSyntax) ) { |
| 152 | + final String syntax = getValue(argInSyntax); |
| 153 | + final Lang lang = RDFLanguages.nameToLang(syntax); |
| 154 | + if ( lang == null ) |
| 155 | + throw new CmdException("Cannot detemine the syntax from '" + syntax + "'"); |
| 156 | + return lang; |
| 157 | + } |
| 158 | + |
| 159 | + if ( ! contains(argInFile) ) { |
| 160 | + throw new CmdException("Input syntax must be specified when reading from stdin."); |
| 161 | + } |
| 162 | + |
| 163 | + final String filename = getValue(argInFile); |
| 164 | + final Lang lang = RDFLanguages.filenameToLang(filename); |
| 165 | + if ( lang == null ) { |
| 166 | + throw new CmdException("Cannot guess the input syntax for the given input file."); |
| 167 | + } |
| 168 | + |
| 169 | + return lang; |
| 170 | + } |
| 171 | + |
| 172 | + protected Set<Node> determinePredicates() { |
| 173 | + if ( ! contains(argPredicate) ) |
| 174 | + throw new CmdException("No predicate URIs specified."); |
| 175 | + |
| 176 | + //containsMultiple(argPredicate); |
| 177 | + final Set<Node> predicates = new HashSet<>(); |
| 178 | + for ( final String p : getValues(argPredicate) ) { |
| 179 | + try { |
| 180 | + new URI(p); |
| 181 | + } |
| 182 | + catch ( final Exception e ) { |
| 183 | + throw new CmdException("One of the given predicate URIs does not seem to be a URI (" + p + ")"); |
| 184 | + } |
| 185 | + |
| 186 | + predicates.add( NodeFactory.createURI(p) ); |
| 187 | + } |
| 188 | + |
| 189 | + return predicates; |
| 190 | + } |
| 191 | + |
| 192 | +} |
0 commit comments