File tree Expand file tree Collapse file tree 1 file changed +14
-0
lines changed
src/main/scala/ldbc/snb/datagen/factors Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Original file line number Diff line number Diff line change @@ -164,6 +164,20 @@ object FactorGenerationStage extends DatagenStage with Logging {
164
164
import model .raw ._
165
165
166
166
private val rawFactors = Map (
167
+ " messageIds" -> Factor (CommentType , PostType ) { case Seq (comments, posts) =>
168
+ val messages =
169
+ (comments.select($" creationDate" , $" deletionDate" , $" id" .as(" MessageId" ))
170
+ |+| posts.select($" creationDate" , $" deletionDate" , $" id" .as(" MessageId" ))
171
+ )
172
+ .select(
173
+ date_trunc(" day" , $" creationDate" ).as(" creationDay" ),
174
+ date_trunc(" day" , $" deletionDate" ).as(" deletionDay" ),
175
+ $" MessageId" )
176
+ val sampleSize = 20000
177
+ val count = messages.count()
178
+ val sampleFraction = Math .min(sampleSize / count, 1.0 )
179
+ messages.sample(sampleFraction, 42 )
180
+ },
167
181
" countryNumPersons" -> Factor (PlaceType , PersonType ) { case Seq (places, persons) =>
168
182
val cities = places.where($" type" === " City" ).cache()
169
183
val countries = places.where($" type" === " Country" ).cache()
You can’t perform that action at this time.
0 commit comments