@@ -301,13 +301,6 @@ object FactorGenerationStage extends DatagenStage with Logging {
301
301
.join(undirectedKnows(personKnowsPerson).as(" knows" ), $" Person1.id" === $" knows.Person1Id" , " leftouter" )
302
302
frequency(knows, value = $" knows.Person2Id" , by = Seq ($" Person1.id" , $" Person1.creationDate" , $" Person1.deletionDate" ))
303
303
},
304
- " personNumFriendsOfFriends" -> Factor (PersonKnowsPersonType , PersonType ) { case Seq (personKnowsPerson, person1) =>
305
- val foaf = person1
306
- .as(" Person1" )
307
- .join(undirectedKnows(personKnowsPerson).as(" knows1" ), $" Person1.id" === $" knows1.Person1Id" , " leftouter" )
308
- .join(undirectedKnows(personKnowsPerson).as(" knows2" ), $" knows1.Person2Id" === $" knows2.Person1Id" , " leftouter" )
309
- frequency(foaf, value = $" knows2.Person2Id" , by = Seq ($" Person1.id" , $" Person1.creationDate" , $" Person1.deletionDate" ))
310
- },
311
304
" languageNumPosts" -> Factor (PostType ) { case Seq (post) =>
312
305
frequency(post.where($" language" .isNotNull), value = $" id" , by = Seq ($" language" ))
313
306
},
@@ -462,12 +455,134 @@ object FactorGenerationStage extends DatagenStage with Logging {
462
455
.where($" creationDate" < $" deletionDate" )
463
456
.coalesce(size)
464
457
},
458
+ // -- interactive --
459
+ // first names
465
460
" personFirstNames" -> Factor (PersonType ) { case Seq (person) =>
466
461
frequency(
467
462
person,
468
463
value = $" id" ,
469
464
by = Seq ($" firstName" )
470
465
)
471
- }
466
+ },
467
+ // friends
468
+ " personNumFriendsOfFriends" -> Factor (PersonKnowsPersonType , PersonType ) { case Seq (personKnowsPerson, person1) =>
469
+ val foaf = person1
470
+ .as(" Person1" )
471
+ .join(undirectedKnows(personKnowsPerson).as(" knows1" ), $" Person1.id" === $" knows1.Person1Id" , " leftouter" )
472
+ .join(undirectedKnows(personKnowsPerson).as(" knows2" ), $" knows1.Person2Id" === $" knows2.Person1Id" , " leftouter" )
473
+ frequency(foaf, value = $" knows2.Person2Id" , by = Seq ($" Person1.id" , $" Person1.creationDate" , $" Person1.deletionDate" ))
474
+ },
475
+ // posts
476
+ " personNumPosts" -> Factor (PersonType , PostType ) { case Seq (person, post) =>
477
+ val posts = person
478
+ .as(" Person" )
479
+ .join(post.as(" post" ), $" post.CreatorPersonId" === $" Person.id" , " leftouter" )
480
+ frequency(posts, value = $" post.id" , by = Seq ($" Person.id" ))
481
+ },
482
+ " personNumFriendPosts" -> Factor (PersonType , PersonKnowsPersonType , PostType ) { case Seq (person, personKnowsPerson, post) =>
483
+ val posts = person
484
+ .as(" Person" )
485
+ .join(undirectedKnows(personKnowsPerson).as(" knows1" ), $" Person.id" === $" knows1.Person1Id" , " leftouter" )
486
+ .join(post.as(" post" ), $" post.CreatorPersonId" === $" knows1.Person2Id" , " leftouter" )
487
+ frequency(posts, value = $" post.id" , by = Seq ($" Person.id" ))
488
+ },
489
+ " personNumFriendOfFriendPosts" -> Factor (PersonType , PersonKnowsPersonType , PostType ) { case Seq (person, personKnowsPerson, post) =>
490
+ val posts = person
491
+ .as(" Person" )
492
+ .join(undirectedKnows(personKnowsPerson).as(" knows1" ), $" Person.id" === $" knows1.Person1Id" , " leftouter" )
493
+ .join(undirectedKnows(personKnowsPerson).as(" knows2" ), $" knows1.Person2Id" === $" knows2.Person1Id" , " leftouter" )
494
+ .join(post.as(" post" ), $" post.CreatorPersonId" === $" knows1.Person2Id" , " leftouter" )
495
+ frequency(posts, value = $" post.id" , by = Seq ($" Person.id" ))
496
+ },
497
+ // comments
498
+ " personNumComments" -> Factor (PersonType , PostType ) { case Seq (person, comment) =>
499
+ val comments = person
500
+ .as(" Person" )
501
+ .join(comment.as(" comment" ), $" comment.CreatorPersonId" === $" Person.id" , " leftouter" )
502
+ frequency(comments, value = $" comment.id" , by = Seq ($" Person.id" ))
503
+ },
504
+ " personNumFriendComments" -> Factor (PersonType , PersonKnowsPersonType , CommentType ) { case Seq (person, personKnowsPerson, comment) =>
505
+ val comments = person
506
+ .as(" Person" )
507
+ .join(undirectedKnows(personKnowsPerson).as(" knows1" ), $" Person.id" === $" knows1.Person1Id" , " leftouter" )
508
+ .join(comment.as(" comment" ), $" comment.CreatorPersonId" === $" knows1.Person2Id" , " leftouter" )
509
+ frequency(comments, value = $" comment.id" , by = Seq ($" Person.id" ))
510
+ },
511
+ // likes
512
+ " personLikesNumMessages" -> Factor (PersonType , PersonLikesCommentType , PersonLikesPostType ) { case Seq (person, personLikesComment, personLikesPost) =>
513
+ val personLikesMessage =
514
+ personLikesComment.select($" PersonId" , $" CommentId" .as(" MessageId" )) |+|
515
+ personLikesPost.select($" PersonId" , $" PostId" .as(" MessageId" ))
516
+ val messages = person
517
+ .as(" Person" )
518
+ .join(personLikesMessage.as(" personLikesMessage" ), $" personLikesMessage.PersonId" === $" Person.id" , " leftouter" )
519
+ frequency(messages, value = $" personLikesMessage.MessageId" , by = Seq ($" Person.id" ))
520
+ },
521
+ // tags
522
+ " personNumTags" -> Factor (PersonHasInterestTagType ) { case Seq (interest) =>
523
+ frequency(
524
+ interest,
525
+ value = $" interestId" ,
526
+ by = Seq ($" personId" )
527
+ )
528
+ },
529
+ " personNumFriendTags" -> Factor (PersonHasInterestTagType , PersonKnowsPersonType ) { case Seq (interest, personKnowsPerson) =>
530
+ frequency(
531
+ undirectedKnows(personKnowsPerson).as(" knows" )
532
+ .join(interest, $" personId" === $" knows.Person2Id" , " leftouter" ),
533
+ value = $" interestId" ,
534
+ by = Seq ($" knows.Person1Id" )
535
+ )
536
+ },
537
+ // forums
538
+ " personNumForums" -> Factor (ForumHasMemberType ) { case Seq (hasMember) =>
539
+ frequency(
540
+ hasMember,
541
+ value = $" ForumId" ,
542
+ by = Seq ($" PersonId" )
543
+ )
544
+ },
545
+ " personNumFriendForums" -> Factor (ForumHasMemberType , PersonKnowsPersonType ) { case Seq (hasMember, personKnowsPerson) =>
546
+ frequency(
547
+ undirectedKnows(personKnowsPerson).as(" knows" )
548
+ .join(hasMember, $" PersonId" === $" knows.Person2Id" , " leftouter" ),
549
+ value = $" ForumId" ,
550
+ by = Seq ($" knows.Person1Id" )
551
+ )
552
+ },
553
+ " personNumFriendOfFriendForums" -> Factor (ForumHasMemberType , PersonKnowsPersonType ) { case Seq (hasMember, personKnowsPerson) =>
554
+ frequency(
555
+ undirectedKnows(personKnowsPerson).as(" knows1" )
556
+ .join(undirectedKnows(personKnowsPerson).as(" knows2" ), $" knows1.Person2Id" === $" knows2.Person1Id" , " leftouter" )
557
+ .join(hasMember, $" PersonId" === $" knows2.Person2Id" , " leftouter" ),
558
+ value = $" ForumId" ,
559
+ by = Seq ($" knows1.Person1Id" )
560
+ )
561
+ },
562
+ // companies
563
+ " personNumCompanies" -> Factor (PersonWorkAtCompanyType ) { case Seq (workAt) =>
564
+ frequency(
565
+ workAt,
566
+ value = $" CompanyId" ,
567
+ by = Seq ($" PersonId" )
568
+ )
569
+ },
570
+ " personNumFriendCompanies" -> Factor (PersonWorkAtCompanyType , PersonKnowsPersonType ) { case Seq (workAt, personKnowsPerson) =>
571
+ frequency(
572
+ undirectedKnows(personKnowsPerson).as(" knows" )
573
+ .join(workAt, $" PersonId" === $" knows.Person2Id" , " leftouter" ),
574
+ value = $" CompanyId" ,
575
+ by = Seq ($" knows.Person1Id" )
576
+ )
577
+ },
578
+ " personNumFriendOfFriendCompanies" -> Factor (PersonWorkAtCompanyType , PersonKnowsPersonType ) { case Seq (workAt, personKnowsPerson) =>
579
+ frequency(
580
+ undirectedKnows(personKnowsPerson).as(" knows1" )
581
+ .join(undirectedKnows(personKnowsPerson).as(" knows2" ), $" knows1.Person2Id" === $" knows2.Person1Id" , " leftouter" )
582
+ .join(workAt, $" PersonId" === $" knows2.Person2Id" , " leftouter" ),
583
+ value = $" CompanyId" ,
584
+ by = Seq ($" knows1.Person1Id" )
585
+ )
586
+ },
472
587
)
473
588
}
0 commit comments