Add axioms RDF Graph Inference example for Spark

GezimSejdiu · GezimSejdiu · commit edc2909de850 · 2018-12-11T13:46:17.000+01:00
diff --git a/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/inference/axioms/RDFGraphInference.scala b/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/inference/axioms/RDFGraphInference.scala
@@ -0,0 +1,79 @@
+package net.sansa_stack.examples.spark.inference.axioms
+
+import net.sansa_stack.inference.rules.{ RDFSLevel, ReasoningProfile }
+import net.sansa_stack.inference.rules.ReasoningProfile._
+import net.sansa_stack.inference.spark.forwardchaining.axioms.{ ForwardRuleReasonerOWLHorst, ForwardRuleReasonerRDFS, TransitiveReasoner }
+import net.sansa_stack.owl.spark.owl._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SparkSession
+import org.semanticweb.owlapi.model.OWLAxiom
+
+object RDFGraphInference {
+
+  def main(args: Array[String]) {
+    parser.parse(args, Config()) match {
+      case Some(config) =>
+        run(config.in, config.profile, config.parallelism)
+      case None =>
+        println(parser.usage)
+    }
+  }
+
+  def run(input: String, profile: ReasoningProfile, parallelism: Int): Unit = {
+
+    // the SPARK config
+    val spark = SparkSession.builder
+      .appName(s"SPARK $profile Reasoning")
+      .master("local[*]")
+      .config("spark.hadoop.validateOutputSpecs", "false") // override output files
+      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+      .config("spark.default.parallelism", parallelism)
+      .config("spark.ui.showConsoleProgress", "false")
+      .config("spark.sql.shuffle.partitions", parallelism)
+      .getOrCreate()
+
+    // load axioms from disk
+    var owlAxioms = spark.owl(Syntax.FUNCTIONAL)(input)
+    println(s"|G| = ${owlAxioms.count()}")
+    
+    // create reasoner and compute inferred graph
+    val inferredGraph = profile match {
+      case RDFS => new ForwardRuleReasonerRDFS(spark.sparkContext, parallelism)(owlAxioms)
+      case OWL_HORST => new ForwardRuleReasonerOWLHorst(spark.sparkContext, parallelism)(owlAxioms)
+      case _ =>
+        throw new RuntimeException("Invalid profile: '" + profile + "'")
+    }
+
+    println(s"|G_inf| = ${inferredGraph.count()}")
+
+    spark.stop()
+  }
+
+  case class Config(
+    in: String = "",
+    profile: ReasoningProfile = ReasoningProfile.RDFS,
+    parallelism: Int = 4)
+
+  // read ReasoningProfile enum
+  implicit val profilesRead: scopt.Read[ReasoningProfile.Value] =
+    scopt.Read.reads(ReasoningProfile forName _.toLowerCase())
+
+  // the CLI parser
+  val parser = new scopt.OptionParser[Config]("RDFGraphMaterializer") {
+
+    head("RDFGraphMaterializer (axioms)", "0.5.0")
+
+    opt[String]('i', "input").required().valueName("<path>").
+      action((x, c) => c.copy(in = x)).
+      text("path to file or directory that contains the input files")
+
+    opt[ReasoningProfile]('p', "profile").required().valueName("{rdfs | owl-horst}").
+      action((x, c) => c.copy(profile = x)).
+      text("the reasoning profile")
+
+    opt[Int]("parallelism").optional().action((x, c) =>
+      c.copy(parallelism = x)).text("the degree of parallelism, i.e. the number of Spark partitions used in the Spark operations")
+
+    help("help").text("prints this usage text")
+  }
+}
diff --git a/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/inference/triples/RDFGraphInference.scala b/sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/inference/triples/RDFGraphInference.scala
@@ -1,16 +1,14 @@
-package net.sansa_stack.examples.spark.inference
+package net.sansa_stack.examples.spark.inference.triples
 
 import java.net.URI
-
-import net.sansa_stack.inference.data.RDFTriple
 import net.sansa_stack.inference.rules.{ RDFSLevel, ReasoningProfile }
 import net.sansa_stack.inference.rules.ReasoningProfile._
 import net.sansa_stack.inference.spark.data.loader.RDFGraphLoader
 import net.sansa_stack.inference.spark.data.writer.RDFGraphWriter
 import net.sansa_stack.inference.spark.forwardchaining.triples.{ForwardRuleReasonerOWLHorst, ForwardRuleReasonerRDFS, TransitiveReasoner}
 import org.apache.jena.graph.{ Node, NodeFactory }
-import org.apache.spark.SparkConf
 import org.apache.spark.sql.SparkSession
+import scala.collection.Seq
 
 object RDFGraphInference {