Skip to content

Commit ffd25d7

Browse files
committed
FactorGen: Implement personNumFriendOfFriendCompanies, reformat a few other factor definitions
1 parent 95caf28 commit ffd25d7

File tree

1 file changed

+50
-32
lines changed

1 file changed

+50
-32
lines changed

src/main/scala/ldbc/snb/datagen/factors/FactorGenerationStage.scala

Lines changed: 50 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -477,10 +477,14 @@ object FactorGenerationStage extends DatagenStage with Logging {
477477

478478
val personFriendsOfFriends = personNumFriends.as("personNumFriends1")
479479
.join(undirectedKnows(personKnowsPerson).as("knows"), $"personNumFriends1.Person1Id" === $"knows.Person1Id", "leftouter")
480-
.join(personNumFriends.as("personNumFriends2"), $"personNumFriends2.Person1Id" === $"knows.Person2Id", "leftouter")
480+
.join(personNumFriends.as("personNumFriends2"), $"personNumFriends2.Person1Id" === $"knows.Person2Id", "leftouter")
481481

482-
val personNumFriendsOfFriends = frequency(personFriendsOfFriends, value = $"personNumFriends2.numFriends", by = Seq($"knows.Person1Id", $"personNumFriends1.numFriends"), agg = sum)
483-
.select($"Person1Id", $"numFriends", $"frequency".as("numFriendsOfFriends"))
482+
val personNumFriendsOfFriends = frequency(
483+
personFriendsOfFriends,
484+
value = $"personNumFriends2.numFriends",
485+
by = Seq($"knows.Person1Id", $"personNumFriends1.numFriends"),
486+
agg = sum
487+
).select($"Person1Id", $"numFriends", $"frequency".as("numFriendsOfFriends"))
484488

485489
personNumFriendsOfFriends
486490
},
@@ -497,17 +501,26 @@ object FactorGenerationStage extends DatagenStage with Logging {
497501
// posts of friends
498502
val friendPosts = numPersonPosts.as("numPersonPosts1")
499503
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numPersonPosts1.Person1Id" === $"knows.Person1Id", "leftouter")
500-
.join(numPersonPosts.as("numPersonPosts2"), $"numPersonPosts2.Person1Id" === $"knows.Person2Id", "leftouter")
504+
.join(numPersonPosts.as("numPersonPosts2"), $"numPersonPosts2.Person1Id" === $"knows.Person2Id", "leftouter")
501505

502-
val numFriendPosts = frequency(friendPosts, value = $"numPersonPosts2.numDirectPosts", by = Seq($"knows.Person1Id", $"numPersonPosts1.numDirectPosts"), agg = sum)
503-
.select($"knows.Person1Id".as("Person1Id"), $"numDirectPosts", $"frequency".as("numFriendPosts"))
506+
val numFriendPosts = frequency(
507+
friendPosts,
508+
value = $"numPersonPosts2.numDirectPosts",
509+
by = Seq($"knows.Person1Id", $"numPersonPosts1.numDirectPosts"),
510+
agg = sum
511+
).select($"knows.Person1Id".as("Person1Id"), $"numDirectPosts", $"frequency".as("numFriendPosts"))
504512

505513
// posts of friends of friends
506514
val friendOfFriendPosts = numFriendPosts.as("numFriendPosts1")
507515
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numFriendPosts1.Person1Id" === $"knows.Person1Id", "leftouter")
508-
.join(numFriendPosts.as("numFriendPosts2"), $"numFriendPosts2.Person1Id" === $"knows.Person2Id", "leftouter")
516+
.join(numFriendPosts.as("numFriendPosts2"), $"numFriendPosts2.Person1Id" === $"knows.Person2Id", "leftouter")
509517

510-
val numFriendOfFriendPosts = frequency(friendOfFriendPosts, value = $"numFriendPosts2.numFriendPosts", by = Seq($"knows.Person1Id", $"numFriendPosts1.numDirectPosts", $"numFriendPosts1.numFriendPosts"), agg = sum)
518+
val numFriendOfFriendPosts = frequency(
519+
friendOfFriendPosts,
520+
value = $"numFriendPosts2.numFriendPosts",
521+
by = Seq($"numFriendPosts1.Person1Id", $"numFriendPosts1.numDirectPosts", $"numFriendPosts1.numFriendPosts"),
522+
agg = sum
523+
)
511524
numFriendOfFriendPosts
512525
},
513526
// comments
@@ -590,32 +603,37 @@ object FactorGenerationStage extends DatagenStage with Logging {
590603
numFriendOfFriendForums
591604
},
592605
// companies
593-
"personNumCompanies" -> Factor(PersonWorkAtCompanyType) { case Seq(workAt) =>
594-
frequency(workAt, value = $"CompanyId", by = Seq($"PersonId"))
595-
},
596-
"personNumFriendCompanies" -> Factor(PersonWorkAtCompanyType, PersonKnowsPersonType) { case Seq(workAt, personKnowsPerson) =>
597-
val personNumCompanies = frequency(workAt, value = $"CompanyId", by = Seq($"PersonId"), agg = count)
598-
.select($"PersonId".as("Person1Id"), $"frequency")
599-
600-
val friendCompanies = personNumCompanies.as("personNumCompanies")
601-
.join(undirectedKnows(personKnowsPerson).as("knows"), $"personNumCompanies.Person1Id" === $"knows.Person2Id", "leftouter")
602-
603-
val numFriendCompanies = frequency(friendCompanies, value = $"frequency", by = Seq($"knows.Person1Id"), agg = sum)
604-
numFriendCompanies
605-
},
606-
"personNumFriendOfFriendCompanies" -> Factor(PersonWorkAtCompanyType, PersonKnowsPersonType) { case Seq(workAt, personKnowsPerson) =>
607-
val personNumCompanies = frequency(workAt, value = $"CompanyId", by = Seq($"PersonId"), agg = count)
608-
.select($"PersonId".as("Person1Id"), $"frequency")
609-
610-
val friendCompanies = personNumCompanies.as("personNumCompanies")
611-
.join(undirectedKnows(personKnowsPerson).as("knows"), $"personNumCompanies.Person1Id" === $"knows.Person2Id", "leftouter")
612-
613-
val numFriendCompanies = frequency(friendCompanies, value = $"frequency", by = Seq($"knows.Person1Id"), agg = sum)
606+
"personNumFriendOfFriendCompanies" -> Factor(PersonType, PersonWorkAtCompanyType, PersonKnowsPersonType) { case Seq(person, workAt, personKnowsPerson) =>
607+
// direct companies
608+
val directCompanies = person.as("Person")
609+
.join(workAt.as("workAt"), $"workAt.PersonId" === $"Person.id", "leftouter")
610+
611+
val numCompanies = frequency(directCompanies, value = $"CompanyId", by = Seq($"Person.id"), agg = count)
612+
.select($"Person.id".as("Person1Id"), $"frequency".as("numDirectCompanies"))
613+
614+
val friendCompanies = numCompanies.as("numCompanies1")
615+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numCompanies1.Person1Id" === $"knows.Person1Id", "leftouter")
616+
.join(numCompanies.as("numCompanies2"), $"numCompanies2.Person1Id" === $"knows.Person2Id", "leftouter")
617+
618+
// companies of friends
619+
val numFriendCompanies = frequency(
620+
friendCompanies,
621+
value = $"numCompanies2.numDirectCompanies",
622+
by = Seq($"numCompanies1.Person1Id", $"numCompanies1.numDirectCompanies"),
623+
agg = sum
624+
).select($"numCompanies1.Person1Id".as("Person1Id"), $"numCompanies1.numDirectCompanies", $"frequency".as("numFriendCompanies"))
614625

615-
val friendOfFriendCompanies = numFriendCompanies.as("numFriendCompanies")
616-
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numFriendCompanies.Person1Id" === $"knows.Person2Id", "leftouter")
626+
// companies of friends of friends
627+
val friendOfFriendCompanies = numFriendCompanies.as("numFriendCompanies1")
628+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numFriendCompanies1.Person1Id" === $"knows.Person1Id", "leftouter")
629+
.join(numFriendCompanies.as("numFriendCompanies2"), $"numFriendCompanies2.Person1Id" === $"knows.Person2Id", "leftouter")
617630

618-
val numFriendOfFriendCompanies = frequency(friendOfFriendCompanies, value = $"frequency", by = Seq($"knows.Person1Id"), agg = sum)
631+
val numFriendOfFriendCompanies = frequency(
632+
friendOfFriendCompanies,
633+
value = $"numFriendCompanies2.numFriendCompanies",
634+
by = Seq($"numFriendCompanies1.Person1Id", $"numFriendCompanies1.numDirectCompanies", $"numFriendCompanies1.numFriendCompanies"),
635+
agg = sum
636+
)
619637
numFriendOfFriendCompanies
620638
},
621639
)

0 commit comments

Comments
 (0)