@@ -473,56 +473,41 @@ object FactorGenerationStage extends DatagenStage with Logging {
473
473
.join(undirectedKnows(personKnowsPerson).as(" knows" ), $" Person1.id" === $" knows.Person1Id" , " leftouter" )
474
474
475
475
val personNumFriends = frequency(knows1, value = $" knows.Person2Id" , by = Seq ($" Person1.id" ), agg = count)
476
- .select($" Person1.id" .as(" Person1Id" ), $" frequency" )
476
+ .select($" Person1.id" .as(" Person1Id" ), $" frequency" .as( " numFriends " ) )
477
477
478
- val personFriendsOfFriends = person1
479
- .as(" Person1" )
480
- .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" Person1.id" === $" knows.Person1Id" , " leftouter" )
481
- .join(personNumFriends.as(" personNumFriends" ), $" personNumFriends.Person1Id" === $" knows.Person2Id" , " leftouter" )
478
+ val personFriendsOfFriends = personNumFriends.as(" personNumFriends1" )
479
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" personNumFriends1.Person1Id" === $" knows.Person1Id" , " leftouter" )
480
+ .join(personNumFriends.as(" personNumFriends2" ), $" personNumFriends2.Person1Id" === $" knows.Person2Id" , " leftouter" )
481
+
482
+ val personNumFriendsOfFriends = frequency(personFriendsOfFriends, value = $" personNumFriends2.numFriends" , by = Seq ($" knows.Person1Id" , $" personNumFriends1.numFriends" ), agg = sum)
483
+ .select($" Person1Id" , $" numFriends" , $" frequency" .as(" numFriendsOfFriends" ))
482
484
483
- val personNumFriendsOfFriends = frequency(personFriendsOfFriends, value = $" frequency" , by = Seq ($" knows.Person1Id" ), agg = sum)
484
485
personNumFriendsOfFriends
485
486
},
486
487
// posts
487
- " personNumPosts" -> Factor (PersonType , PostType ) { case Seq (person, post) =>
488
- val posts = person
489
- .as(" Person" )
490
- .join(post.as(" post" ), $" post.CreatorPersonId" === $" Person.id" , " leftouter" )
491
-
492
- val numPosts = frequency(posts, value = $" post.id" , by = Seq ($" Person.id" ))
493
- numPosts
494
- },
495
- " personNumFriendPosts" -> Factor (PersonType , PersonKnowsPersonType , PostType ) { case Seq (person, personKnowsPerson, post) =>
496
- val personPosts = person
497
- .as(" Person" )
498
- .join(post.as(" Post" ), $" Post.CreatorPersonId" === $" Person.id" , " leftouter" )
499
-
500
- val numPersonPosts = frequency(personPosts, value = $" Post.id" , by = Seq ($" Person.id" ), agg = count)
501
- .select($" Person.id" .as(" Person1Id" ), $" frequency" )
502
-
503
- val friendPosts = numPersonPosts.as(" numPersonPosts" )
504
- .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numPersonPosts.Person1Id" === $" knows.Person2Id" , " leftouter" )
505
-
506
- val numFriendPosts = frequency(friendPosts, value = $" frequency" , by = Seq ($" knows.Person1Id" ), agg = sum)
507
- numFriendPosts
508
- },
509
488
" personNumFriendOfFriendPosts" -> Factor (PersonType , PersonKnowsPersonType , PostType ) { case Seq (person, personKnowsPerson, post) =>
510
489
val personPosts = person
511
490
.as(" Person" )
512
491
.join(post.as(" Post" ), $" Post.CreatorPersonId" === $" Person.id" , " leftouter" )
513
492
493
+ // direct posts
514
494
val numPersonPosts = frequency(personPosts, value = $" Post.id" , by = Seq ($" Person.id" ), agg = count)
515
- .select($" Person.id" .as(" Person1Id" ), $" frequency" )
495
+ .select($" Person.id" .as(" Person1Id" ), $" frequency" .as( " numDirectPosts " ) )
516
496
517
- val friendPosts = numPersonPosts.as(" numPersonPosts" )
518
- .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numPersonPosts.Person1Id" === $" knows.Person2Id" , " leftouter" )
497
+ // posts of friends
498
+ val friendPosts = numPersonPosts.as(" numPersonPosts1" )
499
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numPersonPosts1.Person1Id" === $" knows.Person1Id" , " leftouter" )
500
+ .join(numPersonPosts.as(" numPersonPosts2" ), $" numPersonPosts2.Person1Id" === $" knows.Person2Id" , " leftouter" )
519
501
520
- val numFriendPosts = frequency(friendPosts, value = $" frequency" , by = Seq ($" knows.Person1Id" ), agg = sum)
502
+ val numFriendPosts = frequency(friendPosts, value = $" numPersonPosts2.numDirectPosts" , by = Seq ($" knows.Person1Id" , $" numPersonPosts1.numDirectPosts" ), agg = sum)
503
+ .select($" knows.Person1Id" .as(" Person1Id" ), $" numDirectPosts" , $" frequency" .as(" numFriendPosts" ))
521
504
522
- val friendOfFriendPosts = numFriendPosts.as(" numFriendPosts" )
523
- .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numFriendPosts.Person1Id" === $" knows.Person2Id" , " leftouter" )
505
+ // posts of friends of friends
506
+ val friendOfFriendPosts = numFriendPosts.as(" numFriendPosts1" )
507
+ .join(undirectedKnows(personKnowsPerson).as(" knows" ), $" numFriendPosts1.Person1Id" === $" knows.Person1Id" , " leftouter" )
508
+ .join(numFriendPosts.as(" numFriendPosts2" ), $" numFriendPosts2.Person1Id" === $" knows.Person2Id" , " leftouter" )
524
509
525
- val numFriendOfFriendPosts = frequency(friendOfFriendPosts, value = $" frequency " , by = Seq ($" knows.Person1Id" ), agg = sum)
510
+ val numFriendOfFriendPosts = frequency(friendOfFriendPosts, value = $" numFriendPosts2.numFriendPosts " , by = Seq ($" knows.Person1Id" , $ " numFriendPosts1.numDirectPosts " , $ " numFriendPosts1.numFriendPosts " ), agg = sum)
526
511
numFriendOfFriendPosts
527
512
},
528
513
// comments
0 commit comments