Skip to content

Commit 6cc2142

Browse files
committed
make oversize-factor configurable
1 parent c658637 commit 6cc2142

File tree

2 files changed

+15
-6
lines changed

2 files changed

+15
-6
lines changed

src/main/scala/ldbc/snb/datagen/generation/GenerationStage.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,15 @@ object GenerationStage extends DatagenStage with Logging {
1818
numThreads: Option[Int] = None,
1919
params: Map[String, String] = Map.empty,
2020
paramFile: Option[String] = None,
21-
outputDir: String = "out"
21+
outputDir: String = "out",
22+
oversizeFactor: Option[Double] = None
2223
)
2324

24-
def run(config: GeneratorConfiguration)(implicit spark: SparkSession) = {
25+
def run(args: Args, config: GeneratorConfiguration)(implicit spark: SparkSession) = {
2526
val numPartitions = config.getInt("hadoop.numThreads", spark.sparkContext.defaultParallelism)
2627
val idealPartitions = DatagenParams.numPersons.toDouble / optimalPersonsPerFile
2728

28-
val oversizeFactor = Math.max(numPartitions / idealPartitions, 1.0)
29+
val oversizeFactor = args.oversizeFactor.getOrElse(Math.max(numPartitions / idealPartitions, 1.0))
2930

3031
val persons = SparkPersonGenerator(config)
3132

src/main/scala/ldbc/snb/datagen/spark/LdbcDatagen.scala

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ object LdbcDatagen extends SparkApp {
2727
numThreads: Option[Int] = None,
2828
format: String = "csv",
2929
generateFactors: Boolean = false,
30-
formatOptions: Map[String, String] = Map.empty
30+
formatOptions: Map[String, String] = Map.empty,
31+
oversizeFactor: Option[Double] = None
3132
)
3233

3334
def main(args: Array[String]): Unit = {
@@ -65,6 +66,12 @@ object LdbcDatagen extends SparkApp {
6566
.action((x, c) => args.mode.set(c)(x))
6667
.text("Generation mode. Options: raw, bi, interactive. Default: raw")
6768

69+
opt[Double]("oversize-factor")
70+
.action((x, c) => args.oversizeFactor.set(c)(Some(x)))
71+
.text("Controls size of files relative to Persons. " +
72+
"Values larger than 1 will result in less but larger files. " +
73+
"Smaller values result in more, smaller files")
74+
6875
opt[Double]("bulkload-portion")
6976
.action((x, c) => args.bulkloadPortion.set(c)(x))
7077
.text("Bulkload portion. Only applicable to BI and interactive modes")
@@ -113,14 +120,15 @@ object LdbcDatagen extends SparkApp {
113120
params = args.params,
114121
paramFile = args.paramFile,
115122
outputDir = args.outputDir,
116-
numThreads = args.numThreads
123+
numThreads = args.numThreads,
124+
oversizeFactor = args.oversizeFactor
117125
)
118126

119127
val generatorConfig = GenerationStage.buildConfig(generatorArgs)
120128

121129
DatagenContext.initialize(generatorConfig)
122130

123-
GenerationStage.run(generatorConfig)
131+
GenerationStage.run(generatorArgs, generatorConfig)
124132

125133
if (args.generateFactors) {
126134
val factorArgs = FactorGenerationStage.Args()

0 commit comments

Comments
 (0)