Skip to content

Commit 652cc55

Browse files
committed
remove Interactive object
1 parent ff06ad4 commit 652cc55

File tree

4 files changed

+43
-48
lines changed

4 files changed

+43
-48
lines changed

src/main/scala/ldbc/snb/datagen/transformation/transform/Interactive.scala

Lines changed: 0 additions & 41 deletions
This file was deleted.

src/main/scala/ldbc/snb/datagen/transformation/transform/IrToRawTransform.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import ldbc.snb.datagen.model.{EntityType, Mode}
44
import ldbc.snb.datagen.util.sql.qcol
55
import org.apache.spark.sql.DataFrame
66
import org.apache.spark.sql.functions.lit
7-
import org.apache.spark.sql.types.{DateType, LongType, TimestampType}
7+
import org.apache.spark.sql.types.{DateType, TimestampType}
88
import shapeless._
99

1010
object IrToRawTransform extends Transform[Mode.Raw.type, Mode.Raw.type] {

src/main/scala/ldbc/snb/datagen/transformation/transform/RawToBiTransform.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ case class RawToBiTransform(mode: BI, simulationStart: Long, simulationEnd: Long
1313
with Logging {
1414
log.debug(s"BI Transformation parameters: $mode")
1515

16-
val bulkLoadThreshold = Interactive.calculateBulkLoadThreshold(mode.bulkloadPortion, simulationStart, simulationEnd)
16+
val bulkLoadThreshold = RawToInteractiveTransform.calculateBulkLoadThreshold(mode.bulkloadPortion, simulationStart, simulationEnd)
1717

1818
def batchPeriodFormat(batchPeriod: String) = batchPeriod match {
1919
case "year" => "yyyy"
@@ -45,7 +45,7 @@ case class RawToBiTransform(mode: BI, simulationStart: Long, simulationEnd: Long
4545
.filter(inBatch($"creationDate", batchStart, batchEnd))
4646
.pipe(batched)
4747
.select(
48-
Seq($"insert_batch_id".as("batch_id")) ++ Interactive.columns(tpe, df.columns).map(qcol): _*
48+
Seq($"insert_batch_id".as("batch_id")) ++ RawToInteractiveTransform.columns(tpe, df.columns).map(qcol): _*
4949
)
5050
.repartitionByRange($"batch_id")
5151
.sortWithinPartitions($"creationDate")
@@ -70,7 +70,7 @@ case class RawToBiTransform(mode: BI, simulationStart: Long, simulationEnd: Long
7070
case (tpe, v) if tpe.isStatic => tpe -> BatchedEntity(v, None, None)
7171
case (tpe, v) =>
7272
tpe -> BatchedEntity(
73-
Interactive.snapshotPart(tpe, v, bulkLoadThreshold, filterDeletion = false),
73+
RawToInteractiveTransform.snapshotPart(tpe, v, bulkLoadThreshold, filterDeletion = false),
7474
Some(Batched(insertBatchPart(tpe, v, bulkLoadThreshold, simulationEnd), Seq("batch_id"))),
7575
if (keepImplicitDeletes || v.columns.contains("explicitlyDeleted"))
7676
Some(Batched(deleteBatchPart(tpe, v, bulkLoadThreshold, simulationEnd), Seq("batch_id")))
Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,60 @@
11
package ldbc.snb.datagen.transformation.transform
22

3-
import ldbc.snb.datagen.model.{Graph, Mode}
3+
import ldbc.snb.datagen.model.Cardinality.NN
4+
import ldbc.snb.datagen.model.EntityType.Edge
5+
import ldbc.snb.datagen.model.{EntityType, Graph, Mode}
46
import ldbc.snb.datagen.util.Logging
7+
import ldbc.snb.datagen.util.sql._
8+
import ldbc.snb.datagen.syntax._
59
import org.apache.spark.sql.DataFrame
10+
import org.apache.spark.sql.functions.{col, lit, to_timestamp}
611

712
case class RawToInteractiveTransform(mode: Mode.Interactive, simulationStart: Long, simulationEnd: Long)
813
extends Transform[Mode.Raw.type, Mode.Interactive]
914
with Logging {
1015
log.debug(s"Interactive Transformation parameters: $mode")
1116

12-
val bulkLoadThreshold = Interactive.calculateBulkLoadThreshold(mode.bulkLoadPortion, simulationStart, simulationEnd)
17+
val bulkLoadThreshold = RawToInteractiveTransform.calculateBulkLoadThreshold(mode.bulkLoadPortion, simulationStart, simulationEnd)
1318

1419
override def transform(input: In): Out = {
1520
val entities = input.entities
1621
.map { case (tpe, v) =>
1722
tpe -> IrToRawTransform.convertDates(tpe, v)
1823
}
1924
.map { case (tpe, v) =>
20-
tpe -> Interactive.snapshotPart(tpe, v, bulkLoadThreshold, filterDeletion = true)
25+
tpe -> RawToInteractiveTransform.snapshotPart(tpe, v, bulkLoadThreshold, filterDeletion = true)
2126
}
2227
Graph[Mode.Interactive](isAttrExploded = input.isAttrExploded, isEdgesExploded = input.isEdgesExploded, mode, entities)
2328
}
2429
}
30+
31+
object RawToInteractiveTransform {
32+
33+
def columns(tpe: EntityType, cols: Seq[String]) = tpe match {
34+
case tpe if tpe.isStatic => cols
35+
case Edge("Knows", "Person", "Person", NN, false) =>
36+
val rawCols = Set("deletionDate", "explicitlyDeleted", "weight")
37+
cols.filter(!rawCols.contains(_))
38+
case _ =>
39+
val rawCols = Set("deletionDate", "explicitlyDeleted")
40+
cols.filter(!rawCols.contains(_))
41+
}
42+
43+
def calculateBulkLoadThreshold(bulkLoadPortion: Double, simulationStart: Long, simulationEnd: Long) = {
44+
(simulationEnd - ((simulationEnd - simulationStart) * (1 - bulkLoadPortion)).toLong)
45+
}
46+
47+
def snapshotPart(tpe: EntityType, df: DataFrame, bulkLoadThreshold: Long, filterDeletion: Boolean) = {
48+
val filterBulkLoad = (ds: DataFrame) =>
49+
ds
50+
.filter(
51+
$"creationDate" < to_timestamp(lit(bulkLoadThreshold / 1000)) &&
52+
(!lit(filterDeletion) || $"deletionDate" >= to_timestamp(lit(bulkLoadThreshold / 1000)))
53+
)
54+
55+
tpe match {
56+
case tpe if tpe.isStatic => df
57+
case tpe => filterBulkLoad(df).select(columns(tpe, df.columns).map(name => col(qualified(name))): _*)
58+
}
59+
}
60+
}

0 commit comments

Comments
 (0)