@@ -303,13 +303,6 @@ object FactorGenerationStage extends DatagenStage with Logging {
303
303
.join(undirectedKnows(personKnowsPerson).as(" knows" ), $" Person1.id" === $" knows.Person1Id" , " leftouter" )
304
304
frequency(knows, value = $" knows.Person2Id" , by = Seq ($" Person1.id" , $" Person1.creationDate" , $" Person1.deletionDate" ))
305
305
},
306
- " personNumFriendsOfFriends" -> Factor (PersonKnowsPersonType , PersonType ) { case Seq (personKnowsPerson, person1) =>
307
- val foaf = person1
308
- .as(" Person1" )
309
- .join(undirectedKnows(personKnowsPerson).as(" knows1" ), $" Person1.id" === $" knows1.Person1Id" , " leftouter" )
310
- .join(undirectedKnows(personKnowsPerson).as(" knows2" ), $" knows1.Person2Id" === $" knows2.Person1Id" , " leftouter" )
311
- frequency(foaf, value = $" knows2.Person2Id" , by = Seq ($" Person1.id" , $" Person1.creationDate" , $" Person1.deletionDate" ))
312
- },
313
306
" languageNumPosts" -> Factor (PostType ) { case Seq (post) =>
314
307
frequency(post.where($" language" .isNotNull), value = $" id" , by = Seq ($" language" ))
315
308
},
@@ -468,12 +461,196 @@ object FactorGenerationStage extends DatagenStage with Logging {
468
461
.where($" creationDate" < $" deletionDate" )
469
462
.coalesce(size)
470
463
},
464
+ // -- interactive --
465
+ // first names
471
466
" personFirstNames" -> Factor (PersonType ) { case Seq (person) =>
472
- frequency(
473
- person,
474
- value = $" id" ,
475
- by = Seq ($" firstName" )
467
+ frequency(person, value = $" id" , by = Seq ($" firstName" ))
468
+ },
469
+ // friends
470
+ " personNumFriendsOfFriends" -> Factor (PersonKnowsPersonType , PersonType ) { case Seq (personKnowsPerson, person1) =>
471
+ // direct friends
472
+ val knows1 = person1
473
+ .as(" Person1" )
474
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" Person1.id" === $" knows.Person1Id" , " leftouter" )
475
+
476
+ val personNumFriends = frequency(knows1, value = $" knows.Person2Id" , by = Seq ($" Person1.id" ), agg = count)
477
+ .select($" Person1.id" .as(" Person1Id" ), $" frequency" .as(" numFriends" ))
478
+
479
+ // friends of friends
480
+ val personFriendsOfFriends = personNumFriends.as(" personNumFriends1" )
481
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" personNumFriends1.Person1Id" === $" knows.Person1Id" , " leftouter" )
482
+ .join(personNumFriends.as(" personNumFriends2" ), $" personNumFriends2.Person1Id" === $" knows.Person2Id" , " leftouter" )
483
+
484
+ val personNumFriendsOfFriends = frequency(
485
+ personFriendsOfFriends,
486
+ value = $" personNumFriends2.numFriends" ,
487
+ by = Seq ($" personNumFriends1.Person1Id" , $" personNumFriends1.numFriends" ),
488
+ agg = sum
489
+ ).select($" Person1Id" , $" numFriends" , $" frequency" .as(" numFriendsOfFriends" ))
490
+
491
+ personNumFriendsOfFriends
492
+ },
493
+ // posts
494
+ " personNumFriendOfFriendPosts" -> Factor (PersonType , PersonKnowsPersonType , PostType ) { case Seq (person, personKnowsPerson, post) =>
495
+ val personPosts = person
496
+ .as(" Person" )
497
+ .join(post.as(" Post" ), $" Post.CreatorPersonId" === $" Person.id" , " leftouter" )
498
+
499
+ // direct posts
500
+ val numPersonPosts = frequency(personPosts, value = $" Post.id" , by = Seq ($" Person.id" ), agg = count)
501
+ .select($" Person.id" .as(" Person1Id" ), $" frequency" .as(" numDirectPosts" ))
502
+
503
+ // posts of friends
504
+ val friendPosts = numPersonPosts.as(" numPersonPosts1" )
505
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numPersonPosts1.Person1Id" === $" knows.Person1Id" , " leftouter" )
506
+ .join(numPersonPosts.as(" numPersonPosts2" ), $" numPersonPosts2.Person1Id" === $" knows.Person2Id" , " leftouter" )
507
+
508
+ val numFriendPosts = frequency(
509
+ friendPosts,
510
+ value = $" numPersonPosts2.numDirectPosts" ,
511
+ by = Seq ($" numPersonPosts1.Person1Id" , $" numPersonPosts1.numDirectPosts" ),
512
+ agg = sum
513
+ ).select($" numPersonPosts1.Person1Id" .as(" Person1Id" ), $" numDirectPosts" , $" frequency" .as(" numFriendPosts" ))
514
+
515
+ // posts of friends of friends
516
+ val friendOfFriendPosts = numFriendPosts.as(" numFriendPosts1" )
517
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numFriendPosts1.Person1Id" === $" knows.Person1Id" , " leftouter" )
518
+ .join(numFriendPosts.as(" numFriendPosts2" ), $" numFriendPosts2.Person1Id" === $" knows.Person2Id" , " leftouter" )
519
+
520
+ val numFriendOfFriendPosts = frequency(
521
+ friendOfFriendPosts,
522
+ value = $" numFriendPosts2.numFriendPosts" ,
523
+ by = Seq ($" numFriendPosts1.Person1Id" , $" numFriendPosts1.numDirectPosts" , $" numFriendPosts1.numFriendPosts" ),
524
+ agg = sum
476
525
)
477
- }
526
+ numFriendOfFriendPosts
527
+ },
528
+ // comments
529
+ " personNumFriendComments" -> Factor (PersonType , PersonKnowsPersonType , CommentType ) { case Seq (person, personKnowsPerson, comment) =>
530
+ // direct comments
531
+ val personComments = person
532
+ .as(" Person" )
533
+ .join(comment.as(" Comment" ), $" Comment.CreatorPersonId" === $" Person.id" , " leftouter" )
534
+
535
+ val numPersonComments = frequency(personComments, value = $" Comment.id" , by = Seq ($" Person.id" ), agg = count)
536
+ .select($" Person.id" .as(" Person1Id" ), $" frequency" .as(" numDirectComments" ))
537
+
538
+ // friend comments
539
+ val friendComments = numPersonComments.as(" numPersonComments1" )
540
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numPersonComments1.Person1Id" === $" knows.Person1Id" , " leftouter" )
541
+ .join(numPersonComments.as(" numPersonComments2" ), $" numPersonComments2.Person1Id" === $" knows.Person2Id" , " leftouter" )
542
+
543
+ val numFriendComments = frequency(
544
+ friendComments,
545
+ value = $" numPersonComments2.numDirectComments" ,
546
+ by = Seq ($" numPersonComments1.Person1Id" , $" numPersonComments1.numDirectComments" ),
547
+ agg = sum
548
+ )
549
+ numFriendComments
550
+ },
551
+ // likes
552
+ " personLikesNumMessages" -> Factor (PersonType , PersonLikesCommentType , PersonLikesPostType ) { case Seq (person, personLikesComment, personLikesPost) =>
553
+ val personLikesMessage =
554
+ personLikesComment.select($" PersonId" , $" CommentId" .as(" MessageId" )) |+|
555
+ personLikesPost.select($" PersonId" , $" PostId" .as(" MessageId" ))
556
+
557
+ val messages = person
558
+ .as(" Person" )
559
+ .join(personLikesMessage.as(" personLikesMessage" ), $" personLikesMessage.PersonId" === $" Person.id" , " leftouter" )
560
+
561
+ val personLikesNumMessages = frequency(messages, value = $" personLikesMessage.MessageId" , by = Seq ($" Person.id" ))
562
+ personLikesNumMessages
563
+ },
564
+ // tags
565
+ " personNumFriendTags" -> Factor (PersonType , PersonHasInterestTagType , PersonKnowsPersonType ) { case Seq (person, interest, personKnowsPerson) =>
566
+ // direct tags
567
+ val personComments = person
568
+ .as(" Person" )
569
+ .join(interest.as(" interest" ), $" interest.PersonId" === $" Person.id" , " leftouter" )
570
+
571
+ val numPersonTags = frequency(personComments, value = $" TagId" , by = Seq ($" PersonId" ), agg = count)
572
+ .select($" PersonId" .as(" Person1Id" ), $" frequency" .as(" numDirectTags" ))
573
+
574
+ // tags of friends
575
+ val friendTags = numPersonTags.as(" numPersonTags1" )
576
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numPersonTags1.Person1Id" === $" knows.Person1Id" , " leftouter" )
577
+ .join(numPersonTags.as(" numPersonTags2" ), $" numPersonTags2.Person1Id" === $" knows.Person2Id" , " leftouter" )
578
+
579
+ val numFriendTags = frequency(
580
+ friendTags,
581
+ value = $" numPersonTags2.numDirectTags" ,
582
+ by = Seq ($" numPersonTags1.Person1Id" , $" numPersonTags1.numDirectTags" ),
583
+ agg = sum
584
+ )
585
+ numFriendTags
586
+ },
587
+ // forums
588
+ " personNumFriendOfFriendForums" -> Factor (PersonType , ForumHasMemberType , PersonKnowsPersonType ) { case Seq (person, hasMember, personKnowsPerson) =>
589
+ // direct forums
590
+ val directForums = person.as(" Person" )
591
+ .join(hasMember.as(" hasMember" ), $" hasMember.PersonId" === $" Person.id" , " leftouter" )
592
+
593
+ val numForums = frequency(directForums, value = $" ForumId" , by = Seq ($" Person.id" ), agg = count)
594
+ .select($" Person.id" .as(" Person1Id" ), $" frequency" .as(" numDirectForums" ))
595
+
596
+ val friendForums = numForums.as(" numForums1" )
597
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numForums1.Person1Id" === $" knows.Person1Id" , " leftouter" )
598
+ .join(numForums.as(" numForums2" ), $" numForums2.Person1Id" === $" knows.Person2Id" , " leftouter" )
599
+
600
+ // forums of friends
601
+ val numFriendForums = frequency(
602
+ friendForums,
603
+ value = $" numForums2.numDirectForums" ,
604
+ by = Seq ($" numForums1.Person1Id" , $" numForums1.numDirectForums" ),
605
+ agg = sum
606
+ ).select($" numForums1.Person1Id" .as(" Person1Id" ), $" numForums1.numDirectForums" , $" frequency" .as(" numFriendForums" ))
607
+
608
+ // forums of friends of friends
609
+ val friendOfFriendForums = numFriendForums.as(" numFriendForums1" )
610
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numFriendForums1.Person1Id" === $" knows.Person1Id" , " leftouter" )
611
+ .join(numFriendForums.as(" numFriendForums2" ), $" numFriendForums2.Person1Id" === $" knows.Person2Id" , " leftouter" )
612
+
613
+ val numFriendOfFriendForums = frequency(
614
+ friendOfFriendForums,
615
+ value = $" numFriendForums2.numFriendForums" ,
616
+ by = Seq ($" numFriendForums1.Person1Id" , $" numFriendForums1.numDirectForums" , $" numFriendForums1.numFriendForums" ),
617
+ agg = sum
618
+ )
619
+ numFriendOfFriendForums
620
+ },
621
+ // companies
622
+ " personNumFriendOfFriendCompanies" -> Factor (PersonType , PersonWorkAtCompanyType , PersonKnowsPersonType ) { case Seq (person, workAt, personKnowsPerson) =>
623
+ // direct companies
624
+ val directCompanies = person.as(" Person" )
625
+ .join(workAt.as(" workAt" ), $" workAt.PersonId" === $" Person.id" , " leftouter" )
626
+
627
+ val numCompanies = frequency(directCompanies, value = $" CompanyId" , by = Seq ($" Person.id" ), agg = count)
628
+ .select($" Person.id" .as(" Person1Id" ), $" frequency" .as(" numDirectCompanies" ))
629
+
630
+ val friendCompanies = numCompanies.as(" numCompanies1" )
631
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numCompanies1.Person1Id" === $" knows.Person1Id" , " leftouter" )
632
+ .join(numCompanies.as(" numCompanies2" ), $" numCompanies2.Person1Id" === $" knows.Person2Id" , " leftouter" )
633
+
634
+ // companies of friends
635
+ val numFriendCompanies = frequency(
636
+ friendCompanies,
637
+ value = $" numCompanies2.numDirectCompanies" ,
638
+ by = Seq ($" numCompanies1.Person1Id" , $" numCompanies1.numDirectCompanies" ),
639
+ agg = sum
640
+ ).select($" numCompanies1.Person1Id" .as(" Person1Id" ), $" numCompanies1.numDirectCompanies" , $" frequency" .as(" numFriendCompanies" ))
641
+
642
+ // companies of friends of friends
643
+ val friendOfFriendCompanies = numFriendCompanies.as(" numFriendCompanies1" )
644
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numFriendCompanies1.Person1Id" === $" knows.Person1Id" , " leftouter" )
645
+ .join(numFriendCompanies.as(" numFriendCompanies2" ), $" numFriendCompanies2.Person1Id" === $" knows.Person2Id" , " leftouter" )
646
+
647
+ val numFriendOfFriendCompanies = frequency(
648
+ friendOfFriendCompanies,
649
+ value = $" numFriendCompanies2.numFriendCompanies" ,
650
+ by = Seq ($" numFriendCompanies1.Person1Id" , $" numFriendCompanies1.numDirectCompanies" , $" numFriendCompanies1.numFriendCompanies" ),
651
+ agg = sum
652
+ )
653
+ numFriendOfFriendCompanies
654
+ },
478
655
)
479
656
}
0 commit comments