Skip to content

Commit 66e8547

Browse files
authored
Implement TSV output mode. (#162)
* Implement TSV output mode. * More doc.
1 parent 69d3a8b commit 66e8547

File tree

5 files changed

+120
-16
lines changed

5 files changed

+120
-16
lines changed

build.sbt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ lazy val cli = project
6767
publish / skip := true,
6868
libraryDependencies ++= Seq(
6969
"com.outr" %% "scribe-slf4j" % "3.10.4",
70-
"com.github.alexarchambault" %% "case-app" % "2.0.6"
70+
"com.github.alexarchambault" %% "case-app" % "2.0.6",
71+
"io.circe" %% "circe-yaml" % "0.14.1",
7172
),
7273
gitCommitString := git.gitHeadCommit.value.getOrElse("Not Set"),
7374
buildInfoKeys := Seq[BuildInfoKey](name, version, scalaVersion, sbtVersion, gitCommitString),

cli/src/main/scala/org/renci/relationgraph/Config.scala

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@ package org.renci.relationgraph
33
import caseapp._
44
import caseapp.core.Error.MalformedValue
55
import caseapp.core.argparser.{ArgParser, SimpleArgParser}
6-
import org.renci.relationgraph.Config.{BoolValue, FalseValue, TrueValue}
7-
import org.renci.relationgraph.RelationGraph.Config.{OWLMode, OutputMode, RDFMode}
6+
import org.renci.relationgraph.Config.{BoolValue, FalseValue, OutputMode, RDFMode, TrueValue}
87

98
@AppName("relation-graph")
109
@ProgName("relation-graph")
@@ -15,8 +14,8 @@ final case class Config(
1514
@HelpMessage("File to stream output triples to.")
1615
@ValueDescription("filename")
1716
outputFile: String,
18-
@HelpMessage("Configure style of triples to be output. RDF mode is the default; each existential relation is collapsed to a single direct triple.")
19-
@ValueDescription("RDF|OWL")
17+
@HelpMessage("Configure style of triples to be output. RDF mode is the default; each existential relation is collapsed to a single direct triple. TSV mode outputs the same triples as RDF mode, but as TSV, compacting IRIs using an optional prefixes file.")
18+
@ValueDescription("RDF|OWL|TSV")
2019
mode: OutputMode = RDFMode,
2120
@HelpMessage("Property to restrict output relations to. Provide option multiple times for multiple properties. If no properties are provided (via CLI or file), then all properties found in the ontology will be used.")
2221
@ValueDescription("IRI")
@@ -42,13 +41,23 @@ final case class Config(
4241
@HelpMessage("Disable inference of unsatisfiable classes by the whelk reasoner (default false)")
4342
@ValueDescription("bool")
4443
disableOwlNothing: BoolValue = FalseValue,
44+
@HelpMessage("Prefix mappings to use for TSV output (YAML dictionary")
45+
@ValueDescription("filename")
46+
prefixes: Option[String],
47+
@HelpMessage("Compact OBO-style IRIs regardless of inclusion in prefixes file")
48+
@ValueDescription("bool")
49+
oboPrefixes: BoolValue = TrueValue,
4550
@HelpMessage("Set log level to INFO")
4651
@ValueDescription("bool")
4752
verbose: Boolean = false) {
4853

4954
def toRelationGraphConfig: RelationGraph.Config =
5055
RelationGraph.Config(
51-
mode = this.mode,
56+
mode = this.mode match {
57+
case Config.RDFMode => RelationGraph.Config.RDFMode
58+
case Config.OWLMode => RelationGraph.Config.OWLMode
59+
case Config.TSVMode => RelationGraph.Config.RDFMode
60+
},
5261
outputSubclasses = this.outputSubclasses.bool,
5362
reflexiveSubclasses = this.reflexiveSubclasses.bool,
5463
equivalenceAsSubclass = this.equivalenceAsSubclass.bool,
@@ -61,10 +70,19 @@ final case class Config(
6170

6271
object Config {
6372

73+
sealed trait OutputMode
74+
75+
case object RDFMode extends OutputMode
76+
77+
case object OWLMode extends OutputMode
78+
79+
case object TSVMode extends OutputMode
80+
6481
implicit val rdfModeParser: ArgParser[OutputMode] = SimpleArgParser.from[OutputMode]("output mode") { arg =>
6582
arg.toLowerCase match {
6683
case "rdf" => Right(RDFMode)
6784
case "owl" => Right(OWLMode)
85+
case "tsv" => Right(TSVMode)
6886
case _ => Left(MalformedValue("output mode", arg))
6987
}
7088
}

cli/src/main/scala/org/renci/relationgraph/Main.scala

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@ import org.semanticweb.owlapi.model._
1010
import scribe.Level
1111
import scribe.filter.{packageName, select}
1212
import zio._
13-
import Config._
13+
import io.circe.yaml.parser
14+
import org.renci.relationgraph.Config.{OWLMode, RDFMode, TSVMode}
1415

15-
import java.io.{File, FileOutputStream}
16+
import java.io.{File, FileOutputStream, FileReader}
1617
import scala.io.Source
1718

1819
object Main extends ZCaseApp[Config] {
@@ -27,7 +28,7 @@ object Main extends ZCaseApp[Config] {
2728
.replace()
2829
}
2930
val program = ZIO.scoped {
30-
createStreamRDF(config.outputFile).flatMap { rdfWriter =>
31+
createStream(config).flatMap { rdfWriter =>
3132
for {
3233
fileProperties <- config.propertiesFile.map(readPropertiesFile).getOrElse(ZIO.succeed(Set.empty[AtomicConcept]))
3334
specifiedProperties = fileProperties ++ config.property.map(prop => AtomicConcept(prop)).to(Set)
@@ -47,6 +48,15 @@ object Main extends ZCaseApp[Config] {
4748
}.exitCode
4849
}
4950

51+
def createStream(config: Config): ZIO[Scope, Throwable, StreamRDF] = config.mode match {
52+
case RDFMode => createStreamRDF(config.outputFile)
53+
case OWLMode => createStreamRDF(config.outputFile)
54+
case TSVMode =>
55+
ZIO.foreach(config.prefixes)(readPrefixesFile).flatMap { maybePrefixes =>
56+
createStreamTSV(config.outputFile, maybePrefixes.getOrElse(Map.empty), config.oboPrefixes.bool)
57+
}
58+
}
59+
5060
def createStreamRDF(path: String): ZIO[Scope, Throwable, StreamRDF] = {
5161
ZIO.acquireRelease(ZIO.attempt(new FileOutputStream(new File(path))))(stream => ZIO.succeed(stream.close())).flatMap { outputStream =>
5262
ZIO.acquireRelease(ZIO.attempt {
@@ -57,6 +67,16 @@ object Main extends ZCaseApp[Config] {
5767
}
5868
}
5969

70+
def createStreamTSV(path: String, prefixes: Map[String, String], oboPrefixes: Boolean): ZIO[Scope, Throwable, StreamRDF] = {
71+
ZIO.attempt(new File(path)).flatMap { file =>
72+
ZIO.acquireRelease(ZIO.attempt {
73+
val stream = new TSVStreamRDF(file, prefixes, oboPrefixes)
74+
stream.start()
75+
stream
76+
})(stream => ZIO.succeed(stream.finish()))
77+
}
78+
}
79+
6080
def loadOntology(path: String): Task[OWLOntology] = for {
6181
manager <- ZIO.attempt(OWLManager.createOWLOntologyManager())
6282
ontology <- ZIO.attemptBlocking(manager.loadOntologyFromOntologyDocument(new File(path)))
@@ -67,4 +87,13 @@ object Main extends ZCaseApp[Config] {
6787
ZIO.attemptBlocking(source.getLines().map(_.trim).filter(_.nonEmpty).map(line => AtomicConcept(line)).to(Set))
6888
}
6989

90+
def readPrefixesFile(filename: String): ZIO[Any, Throwable, Map[String, String]] =
91+
ZIO.attemptBlocking(new FileReader(new File(filename))).acquireReleaseWithAuto { reader =>
92+
ZIO.fromEither {
93+
parser.parse(reader).flatMap { json =>
94+
json.as[Map[String, String]]
95+
}
96+
}
97+
}
98+
7099
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package org.renci.relationgraph
2+
3+
import org.apache.jena.graph.Triple
4+
import org.apache.jena.riot.system.StreamRDF
5+
import org.apache.jena.shared.PrefixMapping
6+
import org.apache.jena.shared.impl.PrefixMappingImpl
7+
import org.apache.jena.sparql.core.Quad
8+
9+
import java.io.{File, PrintWriter}
10+
import scala.jdk.CollectionConverters._
11+
12+
class TSVStreamRDF(file: File, prefixes: Map[String, String], oboPrefixes: Boolean) extends StreamRDF {
13+
14+
private val prefixMapping: PrefixMapping = {
15+
val pm = new PrefixMappingImpl() {
16+
override def shortForm(uri: String): String = {
17+
val shortForm = super.shortForm(uri)
18+
if (oboPrefixes && (shortForm == uri) && (uri.startsWith("http://purl.obolibrary.org/obo/"))) {
19+
val tail = uri.replace("http://purl.obolibrary.org/obo/", "")
20+
tail.split("_", 2).mkString(":")
21+
} else shortForm
22+
}
23+
}
24+
pm.setNsPrefixes(prefixes.asJava).withDefaultMappings(PrefixMapping.Standard)
25+
}
26+
27+
private var writer: PrintWriter = _
28+
29+
override def start(): Unit = {
30+
writer = new PrintWriter(file, "utf-8")
31+
}
32+
33+
override def triple(triple: Triple): Unit = {
34+
val s = triple.getSubject.toString(prefixMapping, true)
35+
val p = triple.getPredicate.toString(prefixMapping, true)
36+
val o = triple.getObject.toString(prefixMapping, true)
37+
writer.println(s"$s\t$p\t$o")
38+
}
39+
40+
override def quad(quad: Quad): Unit = {
41+
val s = quad.getSubject.toString(prefixMapping, true)
42+
val p = quad.getPredicate.toString(prefixMapping, true)
43+
val o = quad.getObject.toString(prefixMapping, true)
44+
val g = quad.getGraph.toString(prefixMapping, true)
45+
writer.println(s"$s\t$p\t$o\t$g")
46+
}
47+
48+
override def base(base: String): Unit = ()
49+
50+
override def prefix(prefix: String, iri: String): Unit = ()
51+
52+
override def finish(): Unit = {
53+
writer.close()
54+
}
55+
56+
}

core/src/main/scala/org/renci/relationgraph/RelationGraph.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import org.apache.jena.sys.JenaSystem
66
import org.apache.jena.vocabulary.{OWL2, RDF, RDFS}
77
import org.geneontology.whelk.BuiltIn.{Bottom, Top}
88
import org.geneontology.whelk._
9-
import org.renci.relationgraph.RelationGraph.Config.{OWLMode, OutputMode, RDFMode}
9+
import org.renci.relationgraph.RelationGraph.Config.{OWLMode, RDFMode, TriplesMode}
1010
import org.semanticweb.owlapi.apibinding.OWLFunctionalSyntaxFactory.{OWLNothing, OWLThing}
1111
import org.semanticweb.owlapi.model.parameters.Imports
1212
import org.semanticweb.owlapi.model._
@@ -32,7 +32,7 @@ object RelationGraph extends StrictLogging {
3232
private val OWLOntology = OWL2.Ontology.asNode
3333

3434
final case class Config(
35-
mode: OutputMode = RDFMode,
35+
mode: TriplesMode = RDFMode,
3636
outputSubclasses: Boolean = false,
3737
reflexiveSubclasses: Boolean = true,
3838
equivalenceAsSubclass: Boolean = true,
@@ -43,11 +43,11 @@ object RelationGraph extends StrictLogging {
4343

4444
object Config {
4545

46-
sealed trait OutputMode
46+
sealed trait TriplesMode
4747

48-
case object RDFMode extends OutputMode
48+
case object RDFMode extends TriplesMode
4949

50-
case object OWLMode extends OutputMode
50+
case object OWLMode extends TriplesMode
5151

5252
}
5353

@@ -101,7 +101,7 @@ object RelationGraph extends StrictLogging {
101101
} yield ()
102102
}
103103

104-
def processRestrictionAndExtendQueue(restriction: Restriction, properties: Hierarchy, classes: Hierarchy, whelk: IndexedReasonerState, mode: Config.OutputMode, descendProperties: Boolean, outputClasses: Boolean, outputIndividuals: Boolean, queue: Queue[Restriction], activeRestrictions: Ref[Int], seenRefs: Map[Role, Ref[Set[AtomicConcept]]]): UIO[TriplesGroup] = {
104+
def processRestrictionAndExtendQueue(restriction: Restriction, properties: Hierarchy, classes: Hierarchy, whelk: IndexedReasonerState, mode: Config.TriplesMode, descendProperties: Boolean, outputClasses: Boolean, outputIndividuals: Boolean, queue: Queue[Restriction], activeRestrictions: Ref[Int], seenRefs: Map[Role, Ref[Set[AtomicConcept]]]): UIO[TriplesGroup] = {
105105
val triples = processRestriction(restriction, whelk, mode, outputClasses, outputIndividuals)
106106
val continue = triples.redundant.nonEmpty
107107
for {
@@ -134,7 +134,7 @@ object RelationGraph extends StrictLogging {
134134
} yield triples
135135
}
136136

137-
def processRestriction(restriction: Restriction, whelk: IndexedReasonerState, mode: Config.OutputMode, outputClasses: Boolean, outputIndividuals: Boolean): TriplesGroup = {
137+
def processRestriction(restriction: Restriction, whelk: IndexedReasonerState, mode: Config.TriplesMode, outputClasses: Boolean, outputIndividuals: Boolean): TriplesGroup = {
138138
val subConcepts = queryExistentialSubclasses(restriction, whelk)
139139
val subclasses = if (outputClasses) (subConcepts - Bottom).collect { case AtomicConcept(id) => id } else Set.empty[String]
140140
val instances = if (outputIndividuals) subConcepts.collect { case Nominal(Individual(id)) => id } else Set.empty[String]

0 commit comments

Comments
 (0)