Skip to content
This repository was archived by the owner on Oct 8, 2020. It is now read-only.

Commit 2d45edf

Browse files
committed
Make Spark module Scala-style compliant
1 parent 7b28bc8 commit 2d45edf

File tree

21 files changed

+160
-158
lines changed

21 files changed

+160
-158
lines changed

sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/inference/RDFGraphInference.scala

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,15 @@ package net.sansa_stack.examples.spark.inference
22

33
import java.net.URI
44

5-
import org.apache.jena.graph.{ Node, NodeFactory }
6-
import org.apache.spark.SparkConf
7-
import org.apache.spark.sql.SparkSession
8-
95
import net.sansa_stack.inference.data.RDFTriple
10-
import net.sansa_stack.inference.rules.ReasoningProfile._
116
import net.sansa_stack.inference.rules.{ RDFSLevel, ReasoningProfile }
7+
import net.sansa_stack.inference.rules.ReasoningProfile._
128
import net.sansa_stack.inference.spark.data.loader.RDFGraphLoader
139
import net.sansa_stack.inference.spark.data.writer.RDFGraphWriter
1410
import net.sansa_stack.inference.spark.forwardchaining.triples.{ForwardRuleReasonerOWLHorst, ForwardRuleReasonerRDFS, TransitiveReasoner}
11+
import org.apache.jena.graph.{ Node, NodeFactory }
12+
import org.apache.spark.SparkConf
13+
import org.apache.spark.sql.SparkSession
1514

1615
object RDFGraphInference {
1716

@@ -45,7 +44,7 @@ object RDFGraphInference {
4544
// create reasoner
4645
val reasoner = profile match {
4746
case TRANSITIVE => new TransitiveReasoner(spark.sparkContext, properties, parallelism)
48-
case RDFS => new ForwardRuleReasonerRDFS(spark.sparkContext, parallelism)
47+
case RDFS => new ForwardRuleReasonerRDFS(spark.sparkContext, parallelism)
4948
case RDFS_SIMPLE =>
5049
val r = new ForwardRuleReasonerRDFS(spark.sparkContext, parallelism)
5150
r.level = RDFSLevel.SIMPLE
@@ -64,13 +63,13 @@ object RDFGraphInference {
6463
}
6564

6665
case class Config(
67-
in: Seq[URI] = Seq(),
68-
out: URI = new URI("."),
69-
properties: Seq[Node] = Seq(),
70-
profile: ReasoningProfile = ReasoningProfile.RDFS,
71-
writeToSingleFile: Boolean = false,
72-
sortedOutput: Boolean = false,
73-
parallelism: Int = 4)
66+
in: Seq[URI] = Seq(),
67+
out: URI = new URI("."),
68+
properties: Seq[Node] = Seq(),
69+
profile: ReasoningProfile = ReasoningProfile.RDFS,
70+
writeToSingleFile: Boolean = false,
71+
sortedOutput: Boolean = false,
72+
parallelism: Int = 4)
7473

7574
// read ReasoningProfile enum
7675
implicit val profilesRead: scopt.Read[ReasoningProfile.Value] =
@@ -118,4 +117,4 @@ object RDFGraphInference {
118117
if (c.profile == TRANSITIVE && c.properties.isEmpty) failure("Option --properties must not be empty if profile 'transitive' is set")
119118
else success)
120119
}
121-
}
120+
}

sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/clustering/BorderFlowClustering.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
package net.sansa_stack.examples.spark.ml.clustering
22

33
import scala.collection.mutable
4-
import org.apache.spark.sql.SparkSession
5-
import org.apache.log4j.{ Level, Logger }
4+
65
import net.sansa_stack.ml.spark.clustering.{ BorderFlow, FirstHardeninginBorderFlow }
7-
import org.apache.jena.riot.Lang
86
import net.sansa_stack.rdf.spark.io._
97
import net.sansa_stack.rdf.spark.model.graph._
8+
import org.apache.jena.riot.Lang
9+
import org.apache.log4j.{ Level, Logger }
10+
import org.apache.spark.sql.SparkSession
11+
1012

1113
object BorderFlowClustering {
1214

@@ -37,7 +39,7 @@ object BorderFlowClustering {
3739
val graph = triples.asStringGraph()
3840

3941
val borderflow = algName match {
40-
case "borderflow" => BorderFlow(spark, graph, output, outputevlsoft, outputevlhard)
42+
case "borderflow" => BorderFlow(spark, graph, output, outputevlsoft, outputevlhard)
4143
case "firsthardening" => FirstHardeninginBorderFlow(spark, graph, output, outputevlhard)
4244
case _ =>
4345
throw new RuntimeException("'" + algName + "' - Not supported, yet.")

sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/clustering/RDFByModularityClustering.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package net.sansa_stack.examples.spark.ml.clustering
22

33
import scala.collection.mutable
4-
import org.apache.spark.sql.SparkSession
5-
import org.apache.log4j.{ Level, Logger }
4+
65
import net.sansa_stack.ml.spark.clustering.{ RDFByModularityClustering => RDFByModularityClusteringAlg }
6+
import org.apache.log4j.{ Level, Logger }
7+
import org.apache.spark.sql.SparkSession
8+
79

810
object RDFByModularityClustering {
911

@@ -58,4 +60,4 @@ object RDFByModularityClustering {
5860

5961
help("help").text("prints this usage text")
6062
}
61-
}
63+
}

sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/clustering/RDFGraphPIClustering.scala

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
package net.sansa_stack.examples.spark.ml.clustering
22

33
import scala.collection.mutable
4-
import org.apache.spark.sql.SparkSession
5-
import org.apache.log4j.{ Level, Logger }
6-
import org.apache.jena.riot.{ Lang, RDFDataMgr }
7-
import java.io.ByteArrayInputStream
8-
import org.apache.jena.riot.Lang
4+
5+
import net.sansa_stack.ml.spark.clustering.RDFGraphPowerIterationClustering
96
import net.sansa_stack.rdf.spark.io._
107
import net.sansa_stack.rdf.spark.model.graph._
11-
import net.sansa_stack.rdf._
12-
import net.sansa_stack.ml.spark.clustering.RDFGraphPowerIterationClustering
8+
import org.apache.jena.riot.{ Lang, RDFDataMgr }
9+
import org.apache.log4j.{ Level, Logger }
10+
import org.apache.spark.sql.SparkSession
1311

1412
object RDFGraphPIClustering {
1513

sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/clustering/SilviaClustering.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
package net.sansa_stack.examples.spark.ml.clustering
22

33
import scala.collection.mutable
4-
import org.apache.spark.sql.SparkSession
5-
import org.apache.log4j.{ Level, Logger }
6-
import java.net.{ URI => JavaURI }
4+
75
import net.sansa_stack.ml.spark.clustering.{ SilviaClustering => AlgSilviaClustering }
8-
import org.apache.jena.riot.Lang
96
import net.sansa_stack.rdf.spark.io._
107
import net.sansa_stack.rdf.spark.model.graph._
8+
import org.apache.jena.riot.Lang
9+
import org.apache.log4j.{ Level, Logger }
10+
import org.apache.spark.sql.SparkSession
11+
12+
1113

1214
object SilviaClustering {
1315

sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/kernel/RDFGraphKernel.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ object RDFGraphKernel {
5050
}
5151

5252
case class Config(
53-
in: String = "",
54-
iteration: Int = 5)
53+
in: String = "",
54+
iteration: Int = 5)
5555

5656
val parser = new scopt.OptionParser[Config]("Mines the Rules example") {
5757

@@ -68,4 +68,3 @@ object RDFGraphKernel {
6868
help("help").text("prints this usage text")
6969
}
7070
}
71-

sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/kge/CrossValidation.scala

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
package net.sansa_stack.examples.spark.ml.kge
22

3-
import org.apache.spark.sql._
4-
import net.sansa_stack.rdf.spark.kge.triples.Triples
3+
import net.sansa_stack.ml.spark.kge.linkprediction.crossvalidation.{ kFold, Bootstrapping, Holdout }
54
import net.sansa_stack.rdf.spark.kge.convertor.ByIndex
6-
import net.sansa_stack.ml.spark.kge.linkprediction.crossvalidation.{ Bootstrapping, Holdout }
7-
import net.sansa_stack.ml.spark.kge.linkprediction.crossvalidation.kFold
5+
import net.sansa_stack.rdf.spark.kge.triples.Triples
6+
import org.apache.spark.sql._
87

98
object CrossValidation {
109

@@ -45,9 +44,9 @@ object CrossValidation {
4544
indexedData.numeric.take(10).foreach(println)
4645

4746
val (train, test) = technique match {
48-
case "holdout" => new Holdout(numericData, 0.6f).crossValidation()
47+
case "holdout" => new Holdout(numericData, 0.6f).crossValidation()
4948
case "bootstrapping" => new Bootstrapping(numericData).crossValidation()
50-
case "kFold" => new kFold(numericData, k, spark).crossValidation()
49+
case "kFold" => new kFold(numericData, k, spark).crossValidation()
5150
case _ =>
5251
throw new RuntimeException("'" + technique + "' - Not supported, yet.")
5352
}
@@ -79,9 +78,9 @@ object CrossValidation {
7978
text("The k value (used only for technique'kFold')")
8079

8180
checkConfig(c =>
82-
if (c.technique == "kFold" && c.k == 0) failure("Option --k-Fold must not be empty if technique 'kFold ' is set")
81+
if (c.technique == "kFold" && c.k == 0) failure("Option --k-Fold must not be empty if technique 'kFold' is set")
8382
else success)
8483

8584
help("help").text("prints this usage text")
8685
}
87-
}
86+
}

sansa-examples-spark/src/main/scala/net/sansa_stack/examples/spark/ml/mining/MineRules.scala

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
package net.sansa_stack.examples.spark.ml.mining
22

33
import scala.collection.mutable
4-
import org.apache.spark.sql.SparkSession
4+
5+
import net.sansa_stack.ml.spark.mining.amieSpark.{ DfLoader, RDFGraphLoader }
56
import net.sansa_stack.ml.spark.mining.amieSpark.KBObject.KB
6-
import net.sansa_stack.ml.spark.mining.amieSpark.{ RDFGraphLoader, DfLoader }
77
import net.sansa_stack.ml.spark.mining.amieSpark.MineRules.Algorithm
8+
import org.apache.spark.sql.SparkSession
89

910
/*
1011
* Mine Rules
@@ -32,7 +33,7 @@ object MineRules {
3233
.master("local[*]")
3334
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
3435
.getOrCreate()
35-
36+
3637
val hdfsPath = outputPath + "/"
3738

3839
val know = new KB()
@@ -44,8 +45,8 @@ object MineRules {
4445

4546
val algo = new Algorithm(know, 0.01, 3, 0.1, hdfsPath)
4647

47-
//var erg = algo.ruleMining(sparkSession.sparkContext, sparkSession.sqlContext)
48-
//println(erg)
48+
// var erg = algo.ruleMining(sparkSession.sparkContext, sparkSession.sqlContext)
49+
// println(erg)
4950
var output = algo.ruleMining(spark.sparkContext, spark.sqlContext)
5051

5152
var outString = output.map { x =>
@@ -55,7 +56,7 @@ object MineRules {
5556
if (i == 0) {
5657
temp = rdfTrp(i) + " <= "
5758
} else {
58-
temp += rdfTrp(i) + " \u2227 "
59+
temp += rdfTrp(i) + """ \u2227 """
5960
}
6061
}
6162
temp = temp.stripSuffix(" \u2227 ")
@@ -67,7 +68,7 @@ object MineRules {
6768
}
6869

6970
case class Config(
70-
in: String = "",
71+
in: String = "",
7172
out: String = "")
7273

7374
val parser = new scopt.OptionParser[Config]("Mines the Rules example") {
@@ -84,4 +85,4 @@ object MineRules {
8485

8586
help("help").text("prints this usage text")
8687
}
87-
}
88+
}
Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
package net.sansa_stack.examples.spark.ml.outliers.anomalydetection
22

33
import scala.collection.mutable
4-
import org.apache.jena.riot.Lang
5-
import net.sansa_stack.rdf.spark.io._
6-
import org.apache.spark.sql.{ SparkSession, Dataset, Row, SaveMode }
7-
import org.apache.spark.storage.StorageLevel
4+
85
import net.sansa_stack.ml.spark.outliers.anomalydetection._
6+
import net.sansa_stack.rdf.spark.io._
7+
import org.apache.jena.riot.Lang
98
import org.apache.spark.rdd.RDD
9+
import org.apache.spark.sql.{ Dataset, Row, SaveMode, SparkSession }
10+
import org.apache.spark.storage.StorageLevel
1011

1112
object AnomalyDetection {
1213
def main(args: Array[String]) {
@@ -19,11 +20,11 @@ object AnomalyDetection {
1920
}
2021

2122
def run(
22-
input: String,
23-
JSimThreshold: Double,
23+
input: String,
24+
JSimThreshold: Double,
2425
anomalyListLimit: Int,
25-
numofpartition: Int,
26-
output: String): Unit = {
26+
numofpartition: Int,
27+
output: String): Unit = {
2728

2829
println("==================================================")
2930
println("| Distributed Anomaly Detection |")
@@ -35,24 +36,24 @@ object AnomalyDetection {
3536
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
3637
.getOrCreate()
3738

38-
//N-Triples Reader
39+
// N-Triples Reader
3940
val lang = Lang.NTRIPLES
4041
val triplesRDD = spark.rdf(lang)(input).repartition(numofpartition).persist()
4142

4243
// predicated that are not interesting for evaluation
4344
val wikiList = List("wikiPageRevisionID,wikiPageID")
4445

45-
//filtering numeric literal having xsd type double,integer,nonNegativeInteger and squareKilometre
46+
// filtering numeric literal having xsd type double,integer,nonNegativeInteger and squareKilometre
4647
val objList = List(
4748
"http://www.w3.org/2001/XMLSchema#double",
4849
"http://www.w3.org/2001/XMLSchema#integer",
4950
"http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
5051
"http://dbpedia.org/datatype/squareKilometre")
5152

52-
//helful for considering only Dbpedia type as their will be yago type,wikidata type also
53+
// helful for considering only Dbpedia type as their will be yago type,wikidata type also
5354
val triplesType = List("http://dbpedia.org/ontology")
5455

55-
//some of the supertype which are present for most of the subject
56+
// some of the supertype which are present for most of the subject
5657
val listSuperType = List(
5758
"http://dbpedia.org/ontology/Activity", "http://dbpedia.org/ontology/Organisation",
5859
"http://dbpedia.org/ontology/Agent", "http://dbpedia.org/ontology/SportsLeague",
@@ -62,7 +63,7 @@ object AnomalyDetection {
6263
"http://dbpedia.org/ontology/Species", "http://dbpedia.org/ontology/Eukaryote",
6364
"http://dbpedia.org/ontology/Location")
6465

65-
//hypernym URI
66+
// hypernym URI
6667
val hypernym = "http://purl.org/linguistics/gold/hypernym"
6768

6869
var clusterOfSubject: RDD[(Set[(String, String, Object)])] = null
@@ -78,7 +79,7 @@ object AnomalyDetection {
7879

7980
val test = setDataSize.map(f => outDetection.iqr2(f, anomalyListLimit))
8081

81-
val testfilter = test.filter(f => f.size > 0) //.distinct()
82+
val testfilter = test.filter(f => f.size > 0) // .distinct()
8283
val testfilterDistinct = testfilter.flatMap(f => f)
8384
testfilterDistinct.saveAsTextFile(output)
8485
setData.unpersist()
@@ -87,11 +88,11 @@ object AnomalyDetection {
8788
}
8889

8990
case class Config(
90-
in: String = "",
91-
threshold: Double = 0.0,
92-
anomalyListLimit: Int = 0,
93-
numofpartition: Int = 0,
94-
out: String = "")
91+
in: String = "",
92+
threshold: Double = 0.0,
93+
anomalyListLimit: Int = 0,
94+
numofpartition: Int = 0,
95+
out: String = "")
9596

9697
val parser = new scopt.OptionParser[Config]("SANSA -Outlier Detection") {
9798

@@ -101,26 +102,26 @@ object AnomalyDetection {
101102
action((x, c) => c.copy(in = x)).
102103
text("path to file that contains RDF data (in N-Triples format)")
103104

104-
//Jaccard similarity threshold value
105+
// Jaccard similarity threshold value
105106
opt[Double]('t', "threshold").required().
106107
action((x, c) => c.copy(threshold = x)).
107108
text("the Jaccard Similarity value")
108109

109-
//number of partition
110+
// number of partition
110111
opt[Int]('a', "numofpartition").required().
111112
action((x, c) => c.copy(numofpartition = x)).
112113
text("Number of partition")
113114

114-
//List limit for calculating IQR
115+
// List limit for calculating IQR
115116
opt[Int]('c', "anomalyListLimit").required().
116117
action((x, c) => c.copy(anomalyListLimit = x)).
117118
text("the outlier List Limit")
118119

119-
//output file path
120+
// output file path
120121
opt[String]('o', "output").required().valueName("<directory>").
121122
action((x, c) => c.copy(out = x)).
122123
text("the output directory")
123124

124125
help("help").text("prints this usage text")
125126
}
126-
}
127+
}

0 commit comments

Comments
 (0)