Skip to content

Commit 3ed8b53

Browse files
committed
smaller refactors
1 parent 29af359 commit 3ed8b53

24 files changed

+69
-118
lines changed

src/main/scala/ldbc/snb/datagen/LdbcDatagen.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import ldbc.snb.datagen.transformation.TransformationStage
88
import ldbc.snb.datagen.util.{SparkApp, lower}
99
import shapeless.lens
1010

11+
import scala.collection.JavaConverters._
12+
1113
object LdbcDatagen extends SparkApp {
1214
val appName = "LDBC SNB Datagen for Spark"
1315

@@ -123,10 +125,8 @@ object LdbcDatagen extends SparkApp {
123125

124126
def run(args: Args): Unit = {
125127

126-
val irFormat = {
127-
val _f = System.getenv("LDBC_DATAGEN_IR_FORMAT")
128-
if (_f == null || _f == "") "parquet" else _f
129-
}
128+
val env = System.getenv().asScala
129+
val irFormat = env.getOrElse("LDBC_DATAGEN_IR_FORMAT", "parquet")
130130

131131
val generatorArgs = GenerationStage.Args(
132132
scaleFactor = args.scaleFactor,

src/main/scala/ldbc/snb/datagen/factors/FactorGenerationStage.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ package ldbc.snb.datagen.factors
22

33
import ldbc.snb.datagen.factors.io.FactorTableSink
44
import ldbc.snb.datagen.io.graphs.GraphSource
5-
import ldbc.snb.datagen.model.EntityType
65
import ldbc.snb.datagen.model
6+
import ldbc.snb.datagen.model.EntityType
77
import ldbc.snb.datagen.syntax._
88
import ldbc.snb.datagen.util.{DatagenStage, Logging}
99
import org.apache.spark.sql.functions.{broadcast, count, date_trunc, sum}

src/main/scala/ldbc/snb/datagen/factors/io/package.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
package ldbc.snb.datagen.factors
22

3+
import ldbc.snb.datagen.io.dataframes.DataFrameSink
34
import ldbc.snb.datagen.io.{PathComponent, Writer}
4-
import ldbc.snb.datagen.util.Logging
55
import ldbc.snb.datagen.model.{GraphLike, Mode}
66
import ldbc.snb.datagen.syntax._
7-
import ldbc.snb.datagen.io.dataframes.DataFrameSink
7+
import ldbc.snb.datagen.util.Logging
88

99
package object io {
1010
case class FactorTableSink(path: String, format: String = "csv")
1111

12-
import ldbc.snb.datagen.io.dataframes.instances._
1312
import ldbc.snb.datagen.io.Writer.ops._
13+
import ldbc.snb.datagen.io.dataframes.instances._
1414

1515
private final class FactorTableWriter[M <: Mode] extends Writer[FactorTableSink] with Logging {
1616
override type Data = FactorTable[M]

src/main/scala/ldbc/snb/datagen/generator/GenerationStage.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import ldbc.snb.datagen.generator.generators.{SparkKnowsGenerator, SparkKnowsMer
44
import ldbc.snb.datagen.generator.serializers.RawSerializer
55
import ldbc.snb.datagen.io.raw.{Csv, Parquet, RawSink}
66
import ldbc.snb.datagen.syntax._
7-
import ldbc.snb.datagen.util.{ConfigParser, DatagenStage, GeneratorConfiguration, Logging, SparkUI, simpleNameOf}
7+
import ldbc.snb.datagen.util._
88
import org.apache.hadoop.fs.{FileSystem, Path}
99
import org.apache.spark.sql.SparkSession
1010

src/main/scala/ldbc/snb/datagen/generator/generators/SparkKnowsGenerator.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
11
package ldbc.snb.datagen.generator.generators
22

33
import ldbc.snb.datagen.entities.dynamic.person.Person
4-
5-
import java.util
6-
import ldbc.snb.datagen.generator.{DatagenContext, DatagenParams}
74
import ldbc.snb.datagen.generator.generators.knowsgenerators.KnowsGenerator
5+
import ldbc.snb.datagen.generator.{DatagenContext, DatagenParams}
86
import ldbc.snb.datagen.util.GeneratorConfiguration
97
import org.apache.spark.rdd.RDD
108
import org.apache.spark.sql.SparkSession
119

10+
import java.util
1211
import scala.collection.JavaConverters._
1312
import scala.collection.SortedMap
14-
import scala.reflect.ClassTag
1513

1614
object SparkKnowsGenerator {
1715
def apply(

src/main/scala/ldbc/snb/datagen/generator/generators/SparkPersonGenerator.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
package ldbc.snb.datagen.generator.generators
22

33
import ldbc.snb.datagen.entities.dynamic.person.Person
4-
import org.apache.spark.sql.SparkSession
54
import ldbc.snb.datagen.generator.{DatagenContext, DatagenParams}
6-
import ldbc.snb.datagen.generator.generators.PersonGenerator
75
import ldbc.snb.datagen.util.GeneratorConfiguration
86
import org.apache.spark.rdd.RDD
7+
import org.apache.spark.sql.SparkSession
98

109
import scala.collection.JavaConverters._
1110

src/main/scala/ldbc/snb/datagen/generator/serializers/ActivityOutputStream.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
package ldbc.snb.datagen.generator.serializers
22

3-
import ldbc.snb.datagen.generator.dictionary.Dictionaries
43
import ldbc.snb.datagen.entities.dynamic.Forum
54
import ldbc.snb.datagen.entities.dynamic.messages.{Comment, Photo, Post}
65
import ldbc.snb.datagen.entities.dynamic.relations.{ForumMembership, Like}
6+
import ldbc.snb.datagen.generator.dictionary.Dictionaries
77
import ldbc.snb.datagen.generator.generators.{GenActivity, GenWall}
88
import ldbc.snb.datagen.io.raw.RecordOutputStream
99
import ldbc.snb.datagen.model.raw

src/main/scala/ldbc/snb/datagen/generator/serializers/PersonOutputStream.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package ldbc.snb.datagen.generator.serializers
22

3-
import ldbc.snb.datagen.generator.dictionary.Dictionaries
43
import ldbc.snb.datagen.entities.dynamic.person.Person
4+
import ldbc.snb.datagen.generator.dictionary.Dictionaries
55
import ldbc.snb.datagen.io.raw.RecordOutputStream
66
import ldbc.snb.datagen.model.raw
77

src/main/scala/ldbc/snb/datagen/generator/serializers/RawSerializer.scala

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,25 @@
11
package ldbc.snb.datagen.generator.serializers
22

33
import ldbc.snb.datagen.entities.dynamic.person.Person
4+
import ldbc.snb.datagen.generator.generators.{GenActivity, PersonActivityGenerator, SparkRanker}
45
import ldbc.snb.datagen.generator.{DatagenContext, DatagenParams}
5-
import ldbc.snb.datagen.generator.generators.SparkRanker
6-
import ldbc.snb.datagen.generator.generators.{GenActivity, PersonActivityGenerator}
76
import ldbc.snb.datagen.io.Writer
87
import ldbc.snb.datagen.io.raw.csv.CsvRowEncoder
98
import ldbc.snb.datagen.io.raw.parquet.ParquetRowEncoder
109
import ldbc.snb.datagen.io.raw.{RawSink, WriteContext, createNewWriteContext, recordOutputStream}
11-
import ldbc.snb.datagen.model.raw.{
12-
Comment,
13-
CommentHasTag,
14-
Forum,
15-
ForumHasMember,
16-
ForumHasTag,
17-
Organisation,
18-
PersonLikesComment,
19-
PersonLikesPost,
20-
Place,
21-
Post,
22-
PostHasTag,
23-
Tag,
24-
TagClass
25-
}
10+
import ldbc.snb.datagen.model.raw._
2611
import ldbc.snb.datagen.model.{EntityTraits, raw}
2712
import ldbc.snb.datagen.syntax._
2813
import ldbc.snb.datagen.util.SerializableConfiguration
2914
import org.apache.hadoop.conf.Configuration
3015
import org.apache.hadoop.fs.{FileSystem, Path}
3116
import org.apache.spark.rdd.RDD
3217
import org.apache.spark.sql.SparkSession
33-
import org.apache.spark.sql.internal.SQLConf
3418

3519
import java.net.URI
3620
import java.util
3721
import java.util.Collections
3822
import java.util.function.Consumer
39-
import scala.collection.JavaConverters._
4023

4124
class RawSerializer(ranker: SparkRanker)(implicit spark: SparkSession) extends Writer[RawSink] {
4225
override type Data = RDD[Person]

src/main/scala/ldbc/snb/datagen/generator/serializers/StaticOutputStream.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
package ldbc.snb.datagen.generator.serializers
22

3-
import ldbc.snb.datagen.generator.dictionary.Dictionaries
4-
import ldbc.snb.datagen.entities.statictype.place.Place
53
import ldbc.snb.datagen.entities.statictype.Organisation
4+
import ldbc.snb.datagen.entities.statictype.place.Place
5+
import ldbc.snb.datagen.generator.dictionary.Dictionaries
6+
import ldbc.snb.datagen.generator.vocabulary.{DBP, DBPOWL}
67
import ldbc.snb.datagen.io.raw.RecordOutputStream
78
import ldbc.snb.datagen.model.raw
89
import ldbc.snb.datagen.util.StringUtils
9-
import ldbc.snb.datagen.generator.vocabulary.{DBP, DBPOWL}
1010

1111
import scala.collection.JavaConverters._
1212
import scala.collection.mutable

0 commit comments

Comments
 (0)