@@ -508,8 +508,27 @@ object FactorGenerationStage extends DatagenStage with Logging {
508
508
.as(" Person" )
509
509
.join(undirectedKnows(personKnowsPerson).as(" knows1" ), $" Person.id" === $" knows1.Person1Id" , " leftouter" )
510
510
.join(undirectedKnows(personKnowsPerson).as(" knows2" ), $" knows1.Person2Id" === $" knows2.Person1Id" , " leftouter" )
511
- .join(post.as(" post" ), $" post.CreatorPersonId" === $" knows1 .Person2Id" , " leftouter" )
511
+ .join(post.as(" post" ), $" post.CreatorPersonId" === $" knows2 .Person2Id" , " leftouter" )
512
512
frequency(posts, value = $" post.id" , by = Seq ($" Person.id" ))
513
+
514
+ val personPosts = person
515
+ .as(" Person" )
516
+ .join(post.as(" Post" ), $" Post.CreatorPersonId" === $" Person.id" , " leftouter" )
517
+
518
+ val numPersonPosts = frequency(personPosts, value = $" Post.id" , by = Seq ($" Person.id" ), agg = count)
519
+ .select($" Person.id" .as(" Person1Id" ), $" frequency" )
520
+
521
+ val friendPosts = numPersonPosts.as(" numPersonPosts" )
522
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numPersonPosts.Person1Id" === $" knows.Person2Id" , " leftouter" )
523
+
524
+ val numFriendPosts = frequency(friendPosts, value = $" frequency" , by = Seq ($" knows.Person1Id" ), agg = sum)
525
+
526
+ val friendOfFriendPosts = numFriendPosts.as(" numFriendPosts" )
527
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numFriendPosts.Person1Id" === $" knows.Person2Id" , " leftouter" )
528
+
529
+ val numFriendOfFriendPosts = frequency(friendOfFriendPosts, value = $" frequency" , by = Seq ($" knows.Person1Id" ), agg = sum)
530
+
531
+ numFriendOfFriendPosts
513
532
},
514
533
// comments
515
534
" personNumComments" -> Factor (PersonType , PostType ) { case Seq (person, comment) =>
@@ -556,40 +575,60 @@ object FactorGenerationStage extends DatagenStage with Logging {
556
575
" personNumFriendForums" -> Factor (ForumHasMemberType , PersonKnowsPersonType ) { case Seq (hasMember, personKnowsPerson) =>
557
576
val personNumForums = frequency(hasMember, value = $" ForumId" , by = Seq ($" PersonId" ), agg = count)
558
577
.select($" PersonId" .as(" Person1Id" ), $" frequency" )
578
+
559
579
val friendForums = personNumForums.as(" personNumForums" )
560
580
.join(undirectedKnows(personKnowsPerson).as(" knows" ), $" personNumForums.Person1Id" === $" knows.Person2Id" , " leftouter" )
561
- frequency(friendForums, value = $" frequency" , by = Seq ($" knows.Person1Id" ), agg = sum)
562
581
582
+ val numFriendForums = frequency(friendForums, value = $" frequency" , by = Seq ($" knows.Person1Id" ), agg = sum)
583
+
584
+ numFriendForums
563
585
},
564
586
" personNumFriendOfFriendForums" -> Factor (ForumHasMemberType , PersonKnowsPersonType ) { case Seq (hasMember, personKnowsPerson) =>
565
- frequency(
566
- undirectedKnows(personKnowsPerson).as(" knows1" )
567
- .join(undirectedKnows(personKnowsPerson).as(" knows2" ), $" knows1.Person2Id" === $" knows2.Person1Id" , " leftouter" )
568
- .join(hasMember, $" PersonId" === $" knows2.Person2Id" , " leftouter" ),
569
- value = $" ForumId" ,
570
- by = Seq ($" knows1.Person1Id" )
571
- )
587
+ val personNumForums = frequency(hasMember, value = $" ForumId" , by = Seq ($" PersonId" ), agg = count)
588
+ .select($" PersonId" .as(" Person1Id" ), $" frequency" )
589
+
590
+ val friendForums = personNumForums.as(" personNumForums" )
591
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" personNumForums.Person1Id" === $" knows.Person2Id" , " leftouter" )
592
+
593
+ val numFriendForums = frequency(friendForums, value = $" frequency" , by = Seq ($" knows.Person1Id" ), agg = sum)
594
+
595
+ val friendOfFriendForums = numFriendForums.as(" numFriendForums" )
596
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numFriendForums.Person1Id" === $" knows.Person2Id" , " leftouter" )
597
+
598
+ val numFriendOfFriendForums = frequency(friendOfFriendForums, value = $" frequency" , by = Seq ($" knows.Person1Id" ), agg = sum)
599
+
600
+ numFriendOfFriendForums
572
601
},
573
602
// companies
574
603
" personNumCompanies" -> Factor (PersonWorkAtCompanyType ) { case Seq (workAt) =>
575
604
frequency(workAt, value = $" CompanyId" , by = Seq ($" PersonId" ))
576
605
},
577
606
" personNumFriendCompanies" -> Factor (PersonWorkAtCompanyType , PersonKnowsPersonType ) { case Seq (workAt, personKnowsPerson) =>
578
- frequency(
579
- undirectedKnows(personKnowsPerson).as(" knows" )
580
- .join(workAt, $" PersonId" === $" knows.Person2Id" , " leftouter" ),
581
- value = $" CompanyId" ,
582
- by = Seq ($" knows.Person1Id" )
583
- )
607
+ val personNumCompanies = frequency(workAt, value = $" CompanyId" , by = Seq ($" PersonId" ), agg = count)
608
+ .select($" PersonId" .as(" Person1Id" ), $" frequency" )
609
+
610
+ val friendCompanies = personNumCompanies.as(" personNumCompanies" )
611
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" personNumCompanies.Person1Id" === $" knows.Person2Id" , " leftouter" )
612
+
613
+ val numFriendCompanies = frequency(friendCompanies, value = $" frequency" , by = Seq ($" knows.Person1Id" ), agg = sum)
614
+
615
+ numFriendCompanies
584
616
},
585
617
" personNumFriendOfFriendCompanies" -> Factor (PersonWorkAtCompanyType , PersonKnowsPersonType ) { case Seq (workAt, personKnowsPerson) =>
586
- frequency(
587
- undirectedKnows(personKnowsPerson).as(" knows1" )
588
- .join(undirectedKnows(personKnowsPerson).as(" knows2" ), $" knows1.Person2Id" === $" knows2.Person1Id" , " leftouter" )
589
- .join(workAt, $" PersonId" === $" knows2.Person2Id" , " leftouter" ),
590
- value = $" CompanyId" ,
591
- by = Seq ($" knows1.Person1Id" )
592
- )
618
+ val personNumCompanies = frequency(workAt, value = $" CompanyId" , by = Seq ($" PersonId" ), agg = count)
619
+ .select($" PersonId" .as(" Person1Id" ), $" frequency" )
620
+
621
+ val friendCompanies = personNumCompanies.as(" personNumCompanies" )
622
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" personNumCompanies.Person1Id" === $" knows.Person2Id" , " leftouter" )
623
+
624
+ val numFriendCompanies = frequency(friendCompanies, value = $" frequency" , by = Seq ($" knows.Person1Id" ), agg = sum)
625
+
626
+ val friendOfFriendCompanies = numFriendCompanies.as(" numFriendCompanies" )
627
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numFriendCompanies.Person1Id" === $" knows.Person2Id" , " leftouter" )
628
+
629
+ val numFriendOfFriendCompanies = frequency(friendOfFriendCompanies, value = $" frequency" , by = Seq ($" knows.Person1Id" ), agg = sum)
630
+
631
+ numFriendOfFriendCompanies
593
632
},
594
633
)
595
634
}
0 commit comments