Skip to content

Commit 5eff344

Browse files
committed
FactorGen: Optimize generation of personNumFriendOfFriendPosts, personNumFriendOfFriendForums, personNumFriendOfFriendCompanies
1 parent 1c77843 commit 5eff344

File tree

1 file changed

+61
-22
lines changed

1 file changed

+61
-22
lines changed

src/main/scala/ldbc/snb/datagen/factors/FactorGenerationStage.scala

Lines changed: 61 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -508,8 +508,27 @@ object FactorGenerationStage extends DatagenStage with Logging {
508508
.as("Person")
509509
.join(undirectedKnows(personKnowsPerson).as("knows1"), $"Person.id" === $"knows1.Person1Id", "leftouter")
510510
.join(undirectedKnows(personKnowsPerson).as("knows2"), $"knows1.Person2Id" === $"knows2.Person1Id", "leftouter")
511-
.join(post.as("post"), $"post.CreatorPersonId" === $"knows1.Person2Id", "leftouter")
511+
.join(post.as("post"), $"post.CreatorPersonId" === $"knows2.Person2Id", "leftouter")
512512
frequency(posts, value = $"post.id", by = Seq($"Person.id"))
513+
514+
val personPosts = person
515+
.as("Person")
516+
.join(post.as("Post"), $"Post.CreatorPersonId" === $"Person.id", "leftouter")
517+
518+
val numPersonPosts = frequency(personPosts, value = $"Post.id", by = Seq($"Person.id"), agg = count)
519+
.select($"Person.id".as("Person1Id"), $"frequency")
520+
521+
val friendPosts = numPersonPosts.as("numPersonPosts")
522+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numPersonPosts.Person1Id" === $"knows.Person2Id", "leftouter")
523+
524+
val numFriendPosts = frequency(friendPosts, value = $"frequency", by = Seq($"knows.Person1Id"), agg = sum)
525+
526+
val friendOfFriendPosts = numFriendPosts.as("numFriendPosts")
527+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numFriendPosts.Person1Id" === $"knows.Person2Id", "leftouter")
528+
529+
val numFriendOfFriendPosts = frequency(friendOfFriendPosts, value = $"frequency", by = Seq($"knows.Person1Id"), agg = sum)
530+
531+
numFriendOfFriendPosts
513532
},
514533
// comments
515534
"personNumComments" -> Factor(PersonType, PostType) { case Seq(person, comment) =>
@@ -556,40 +575,60 @@ object FactorGenerationStage extends DatagenStage with Logging {
556575
"personNumFriendForums" -> Factor(ForumHasMemberType, PersonKnowsPersonType) { case Seq(hasMember, personKnowsPerson) =>
557576
val personNumForums = frequency(hasMember, value = $"ForumId", by = Seq($"PersonId"), agg = count)
558577
.select($"PersonId".as("Person1Id"), $"frequency")
578+
559579
val friendForums = personNumForums.as("personNumForums")
560580
.join(undirectedKnows(personKnowsPerson).as("knows"), $"personNumForums.Person1Id" === $"knows.Person2Id", "leftouter")
561-
frequency(friendForums, value = $"frequency", by = Seq($"knows.Person1Id"), agg = sum)
562581

582+
val numFriendForums = frequency(friendForums, value = $"frequency", by = Seq($"knows.Person1Id"), agg = sum)
583+
584+
numFriendForums
563585
},
564586
"personNumFriendOfFriendForums" -> Factor(ForumHasMemberType, PersonKnowsPersonType) { case Seq(hasMember, personKnowsPerson) =>
565-
frequency(
566-
undirectedKnows(personKnowsPerson).as("knows1")
567-
.join(undirectedKnows(personKnowsPerson).as("knows2"), $"knows1.Person2Id" === $"knows2.Person1Id", "leftouter")
568-
.join(hasMember, $"PersonId" === $"knows2.Person2Id", "leftouter"),
569-
value = $"ForumId",
570-
by = Seq($"knows1.Person1Id")
571-
)
587+
val personNumForums = frequency(hasMember, value = $"ForumId", by = Seq($"PersonId"), agg = count)
588+
.select($"PersonId".as("Person1Id"), $"frequency")
589+
590+
val friendForums = personNumForums.as("personNumForums")
591+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"personNumForums.Person1Id" === $"knows.Person2Id", "leftouter")
592+
593+
val numFriendForums = frequency(friendForums, value = $"frequency", by = Seq($"knows.Person1Id"), agg = sum)
594+
595+
val friendOfFriendForums = numFriendForums.as("numFriendForums")
596+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numFriendForums.Person1Id" === $"knows.Person2Id", "leftouter")
597+
598+
val numFriendOfFriendForums = frequency(friendOfFriendForums, value = $"frequency", by = Seq($"knows.Person1Id"), agg = sum)
599+
600+
numFriendOfFriendForums
572601
},
573602
// companies
574603
"personNumCompanies" -> Factor(PersonWorkAtCompanyType) { case Seq(workAt) =>
575604
frequency(workAt, value = $"CompanyId", by = Seq($"PersonId"))
576605
},
577606
"personNumFriendCompanies" -> Factor(PersonWorkAtCompanyType, PersonKnowsPersonType) { case Seq(workAt, personKnowsPerson) =>
578-
frequency(
579-
undirectedKnows(personKnowsPerson).as("knows")
580-
.join(workAt, $"PersonId" === $"knows.Person2Id", "leftouter"),
581-
value = $"CompanyId",
582-
by = Seq($"knows.Person1Id")
583-
)
607+
val personNumCompanies = frequency(workAt, value = $"CompanyId", by = Seq($"PersonId"), agg = count)
608+
.select($"PersonId".as("Person1Id"), $"frequency")
609+
610+
val friendCompanies = personNumCompanies.as("personNumCompanies")
611+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"personNumCompanies.Person1Id" === $"knows.Person2Id", "leftouter")
612+
613+
val numFriendCompanies = frequency(friendCompanies, value = $"frequency", by = Seq($"knows.Person1Id"), agg = sum)
614+
615+
numFriendCompanies
584616
},
585617
"personNumFriendOfFriendCompanies" -> Factor(PersonWorkAtCompanyType, PersonKnowsPersonType) { case Seq(workAt, personKnowsPerson) =>
586-
frequency(
587-
undirectedKnows(personKnowsPerson).as("knows1")
588-
.join(undirectedKnows(personKnowsPerson).as("knows2"), $"knows1.Person2Id" === $"knows2.Person1Id", "leftouter")
589-
.join(workAt, $"PersonId" === $"knows2.Person2Id", "leftouter"),
590-
value = $"CompanyId",
591-
by = Seq($"knows1.Person1Id")
592-
)
618+
val personNumCompanies = frequency(workAt, value = $"CompanyId", by = Seq($"PersonId"), agg = count)
619+
.select($"PersonId".as("Person1Id"), $"frequency")
620+
621+
val friendCompanies = personNumCompanies.as("personNumCompanies")
622+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"personNumCompanies.Person1Id" === $"knows.Person2Id", "leftouter")
623+
624+
val numFriendCompanies = frequency(friendCompanies, value = $"frequency", by = Seq($"knows.Person1Id"), agg = sum)
625+
626+
val friendOfFriendCompanies = numFriendCompanies.as("numFriendCompanies")
627+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numFriendCompanies.Person1Id" === $"knows.Person2Id", "leftouter")
628+
629+
val numFriendOfFriendCompanies = frequency(friendOfFriendCompanies, value = $"frequency", by = Seq($"knows.Person1Id"), agg = sum)
630+
631+
numFriendOfFriendCompanies
593632
},
594633
)
595634
}

0 commit comments

Comments
 (0)