@@ -5,6 +5,7 @@ import ldbc.snb.datagen.io.graphs.GraphSource
5
5
import ldbc .snb .datagen .model
6
6
import ldbc .snb .datagen .model .EntityType
7
7
import ldbc .snb .datagen .syntax ._
8
+ import ldbc .snb .datagen .transformation .transform .ConvertDates
8
9
import ldbc .snb .datagen .util .{DatagenStage , Logging }
9
10
import org .apache .spark .sql .functions .{broadcast , count , date_trunc , sum }
10
11
import org .apache .spark .sql .{Column , DataFrame , SparkSession }
@@ -15,15 +16,16 @@ case class Factor(requiredEntities: EntityType*)(f: Seq[DataFrame] => DataFrame)
15
16
16
17
object FactorGenerationStage extends DatagenStage with Logging {
17
18
18
- case class Args (outputDir : String = " out" )
19
+ case class Args (outputDir : String = " out" , irFormat : String = " parquet " )
19
20
20
21
def run (args : Args )(implicit spark : SparkSession ): Unit = {
21
22
import ldbc .snb .datagen .factors .io .instances ._
22
23
import ldbc .snb .datagen .io .Reader .ops ._
23
24
import ldbc .snb .datagen .io .Writer .ops ._
24
25
import ldbc .snb .datagen .io .instances ._
25
26
26
- GraphSource (model.graphs.Raw .graphDef, args.outputDir, " csv" ).read
27
+ GraphSource (model.graphs.Raw .graphDef, args.outputDir, args.irFormat).read
28
+ .pipe(ConvertDates .transform)
27
29
.pipe(g =>
28
30
rawFactors.map { case (name, calc) =>
29
31
val resolvedEntities = calc.requiredEntities.foldLeft(Seq .empty[DataFrame ])((args, et) => args :+ g.entities(et))
0 commit comments