Skip to content

Commit 080535e

Browse files
committed
Factorgen: Add message sample
1 parent 2bc2194 commit 080535e

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

src/main/scala/ldbc/snb/datagen/factors/FactorGenerationStage.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,20 @@ object FactorGenerationStage extends DatagenStage with Logging {
164164
import model.raw._
165165

166166
private val rawFactors = Map(
167+
"messageIds" -> Factor(CommentType, PostType) { case Seq(comments, posts) =>
168+
val messages =
169+
(comments.select($"creationDate", $"deletionDate", $"id".as("MessageId"))
170+
|+| posts.select($"creationDate", $"deletionDate", $"id".as("MessageId"))
171+
)
172+
.select(
173+
date_trunc("day", $"creationDate").as("creationDay"),
174+
date_trunc("day", $"deletionDate").as("deletionDay"),
175+
$"MessageId")
176+
val sampleSize = 20000
177+
val count = messages.count()
178+
val sampleFraction = Math.min(sampleSize / count, 1.0)
179+
messages.sample(sampleFraction, 42)
180+
},
167181
"countryNumPersons" -> Factor(PlaceType, PersonType) { case Seq(places, persons) =>
168182
val cities = places.where($"type" === "City").cache()
169183
val countries = places.where($"type" === "Country").cache()

0 commit comments

Comments
 (0)