Skip to content

Commit 44cf9c2

Browse files
authored
Merge pull request #400 from ldbc/interactive-more-factor-tables
Add more Interactive factor tables
2 parents d060baa + 6a3fdfd commit 44cf9c2

File tree

1 file changed

+189
-12
lines changed

1 file changed

+189
-12
lines changed

src/main/scala/ldbc/snb/datagen/factors/FactorGenerationStage.scala

Lines changed: 189 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -303,13 +303,6 @@ object FactorGenerationStage extends DatagenStage with Logging {
303303
.join(undirectedKnows(personKnowsPerson).as("knows"), $"Person1.id" === $"knows.Person1Id", "leftouter")
304304
frequency(knows, value = $"knows.Person2Id", by = Seq($"Person1.id", $"Person1.creationDate", $"Person1.deletionDate"))
305305
},
306-
"personNumFriendsOfFriends" -> Factor(PersonKnowsPersonType, PersonType) { case Seq(personKnowsPerson, person1) =>
307-
val foaf = person1
308-
.as("Person1")
309-
.join(undirectedKnows(personKnowsPerson).as("knows1"), $"Person1.id" === $"knows1.Person1Id", "leftouter")
310-
.join(undirectedKnows(personKnowsPerson).as("knows2"), $"knows1.Person2Id" === $"knows2.Person1Id", "leftouter")
311-
frequency(foaf, value = $"knows2.Person2Id", by = Seq($"Person1.id", $"Person1.creationDate", $"Person1.deletionDate"))
312-
},
313306
"languageNumPosts" -> Factor(PostType) { case Seq(post) =>
314307
frequency(post.where($"language".isNotNull), value = $"id", by = Seq($"language"))
315308
},
@@ -468,12 +461,196 @@ object FactorGenerationStage extends DatagenStage with Logging {
468461
.where($"creationDate" < $"deletionDate")
469462
.coalesce(size)
470463
},
464+
// -- interactive --
465+
// first names
471466
"personFirstNames" -> Factor(PersonType) { case Seq(person) =>
472-
frequency(
473-
person,
474-
value = $"id",
475-
by = Seq($"firstName")
467+
frequency(person, value = $"id", by = Seq($"firstName"))
468+
},
469+
// friends
470+
"personNumFriendsOfFriends" -> Factor(PersonKnowsPersonType, PersonType) { case Seq(personKnowsPerson, person1) =>
471+
// direct friends
472+
val knows1 = person1
473+
.as("Person1")
474+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"Person1.id" === $"knows.Person1Id", "leftouter")
475+
476+
val personNumFriends = frequency(knows1, value = $"knows.Person2Id", by = Seq($"Person1.id"), agg = count)
477+
.select($"Person1.id".as("Person1Id"), $"frequency".as("numFriends"))
478+
479+
// friends of friends
480+
val personFriendsOfFriends = personNumFriends.as("personNumFriends1")
481+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"personNumFriends1.Person1Id" === $"knows.Person1Id", "leftouter")
482+
.join(personNumFriends.as("personNumFriends2"), $"personNumFriends2.Person1Id" === $"knows.Person2Id", "leftouter")
483+
484+
val personNumFriendsOfFriends = frequency(
485+
personFriendsOfFriends,
486+
value = $"personNumFriends2.numFriends",
487+
by = Seq($"personNumFriends1.Person1Id", $"personNumFriends1.numFriends"),
488+
agg = sum
489+
).select($"Person1Id", $"numFriends", $"frequency".as("numFriendsOfFriends"))
490+
491+
personNumFriendsOfFriends
492+
},
493+
// posts
494+
"personNumFriendOfFriendPosts" -> Factor(PersonType, PersonKnowsPersonType, PostType) { case Seq(person, personKnowsPerson, post) =>
495+
val personPosts = person
496+
.as("Person")
497+
.join(post.as("Post"), $"Post.CreatorPersonId" === $"Person.id", "leftouter")
498+
499+
// direct posts
500+
val numPersonPosts = frequency(personPosts, value = $"Post.id", by = Seq($"Person.id"), agg = count)
501+
.select($"Person.id".as("Person1Id"), $"frequency".as("numDirectPosts"))
502+
503+
// posts of friends
504+
val friendPosts = numPersonPosts.as("numPersonPosts1")
505+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numPersonPosts1.Person1Id" === $"knows.Person1Id", "leftouter")
506+
.join(numPersonPosts.as("numPersonPosts2"), $"numPersonPosts2.Person1Id" === $"knows.Person2Id", "leftouter")
507+
508+
val numFriendPosts = frequency(
509+
friendPosts,
510+
value = $"numPersonPosts2.numDirectPosts",
511+
by = Seq($"numPersonPosts1.Person1Id", $"numPersonPosts1.numDirectPosts"),
512+
agg = sum
513+
).select($"numPersonPosts1.Person1Id".as("Person1Id"), $"numDirectPosts", $"frequency".as("numFriendPosts"))
514+
515+
// posts of friends of friends
516+
val friendOfFriendPosts = numFriendPosts.as("numFriendPosts1")
517+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numFriendPosts1.Person1Id" === $"knows.Person1Id", "leftouter")
518+
.join(numFriendPosts.as("numFriendPosts2"), $"numFriendPosts2.Person1Id" === $"knows.Person2Id", "leftouter")
519+
520+
val numFriendOfFriendPosts = frequency(
521+
friendOfFriendPosts,
522+
value = $"numFriendPosts2.numFriendPosts",
523+
by = Seq($"numFriendPosts1.Person1Id", $"numFriendPosts1.numDirectPosts", $"numFriendPosts1.numFriendPosts"),
524+
agg = sum
476525
)
477-
}
526+
numFriendOfFriendPosts
527+
},
528+
// comments
529+
"personNumFriendComments" -> Factor(PersonType, PersonKnowsPersonType, CommentType) { case Seq(person, personKnowsPerson, comment) =>
530+
// direct comments
531+
val personComments = person
532+
.as("Person")
533+
.join(comment.as("Comment"), $"Comment.CreatorPersonId" === $"Person.id", "leftouter")
534+
535+
val numPersonComments = frequency(personComments, value = $"Comment.id", by = Seq($"Person.id"), agg = count)
536+
.select($"Person.id".as("Person1Id"), $"frequency".as("numDirectComments"))
537+
538+
// friend comments
539+
val friendComments = numPersonComments.as("numPersonComments1")
540+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numPersonComments1.Person1Id" === $"knows.Person1Id", "leftouter")
541+
.join(numPersonComments.as("numPersonComments2"), $"numPersonComments2.Person1Id" === $"knows.Person2Id", "leftouter")
542+
543+
val numFriendComments = frequency(
544+
friendComments,
545+
value = $"numPersonComments2.numDirectComments",
546+
by = Seq($"numPersonComments1.Person1Id", $"numPersonComments1.numDirectComments"),
547+
agg = sum
548+
)
549+
numFriendComments
550+
},
551+
// likes
552+
"personLikesNumMessages" -> Factor(PersonType, PersonLikesCommentType, PersonLikesPostType) { case Seq(person, personLikesComment, personLikesPost) =>
553+
val personLikesMessage =
554+
personLikesComment.select($"PersonId", $"CommentId".as("MessageId")) |+|
555+
personLikesPost.select($"PersonId", $"PostId".as("MessageId"))
556+
557+
val messages = person
558+
.as("Person")
559+
.join(personLikesMessage.as("personLikesMessage"), $"personLikesMessage.PersonId" === $"Person.id", "leftouter")
560+
561+
val personLikesNumMessages = frequency(messages, value = $"personLikesMessage.MessageId", by = Seq($"Person.id"))
562+
personLikesNumMessages
563+
},
564+
// tags
565+
"personNumFriendTags" -> Factor(PersonType, PersonHasInterestTagType, PersonKnowsPersonType) { case Seq(person, interest, personKnowsPerson) =>
566+
// direct tags
567+
val personComments = person
568+
.as("Person")
569+
.join(interest.as("interest"), $"interest.PersonId" === $"Person.id", "leftouter")
570+
571+
val numPersonTags = frequency(personComments, value = $"TagId", by = Seq($"PersonId"), agg = count)
572+
.select($"PersonId".as("Person1Id"), $"frequency".as("numDirectTags"))
573+
574+
// tags of friends
575+
val friendTags = numPersonTags.as("numPersonTags1")
576+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numPersonTags1.Person1Id" === $"knows.Person1Id", "leftouter")
577+
.join(numPersonTags.as("numPersonTags2"), $"numPersonTags2.Person1Id" === $"knows.Person2Id", "leftouter")
578+
579+
val numFriendTags = frequency(
580+
friendTags,
581+
value = $"numPersonTags2.numDirectTags",
582+
by = Seq($"numPersonTags1.Person1Id", $"numPersonTags1.numDirectTags"),
583+
agg = sum
584+
)
585+
numFriendTags
586+
},
587+
// forums
588+
"personNumFriendOfFriendForums" -> Factor(PersonType, ForumHasMemberType, PersonKnowsPersonType) { case Seq(person, hasMember, personKnowsPerson) =>
589+
// direct forums
590+
val directForums = person.as("Person")
591+
.join(hasMember.as("hasMember"), $"hasMember.PersonId" === $"Person.id", "leftouter")
592+
593+
val numForums = frequency(directForums, value = $"ForumId", by = Seq($"Person.id"), agg = count)
594+
.select($"Person.id".as("Person1Id"), $"frequency".as("numDirectForums"))
595+
596+
val friendForums = numForums.as("numForums1")
597+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numForums1.Person1Id" === $"knows.Person1Id", "leftouter")
598+
.join(numForums.as("numForums2"), $"numForums2.Person1Id" === $"knows.Person2Id", "leftouter")
599+
600+
// forums of friends
601+
val numFriendForums = frequency(
602+
friendForums,
603+
value = $"numForums2.numDirectForums",
604+
by = Seq($"numForums1.Person1Id", $"numForums1.numDirectForums"),
605+
agg = sum
606+
).select($"numForums1.Person1Id".as("Person1Id"), $"numForums1.numDirectForums", $"frequency".as("numFriendForums"))
607+
608+
// forums of friends of friends
609+
val friendOfFriendForums = numFriendForums.as("numFriendForums1")
610+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numFriendForums1.Person1Id" === $"knows.Person1Id", "leftouter")
611+
.join(numFriendForums.as("numFriendForums2"), $"numFriendForums2.Person1Id" === $"knows.Person2Id", "leftouter")
612+
613+
val numFriendOfFriendForums = frequency(
614+
friendOfFriendForums,
615+
value = $"numFriendForums2.numFriendForums",
616+
by = Seq($"numFriendForums1.Person1Id", $"numFriendForums1.numDirectForums", $"numFriendForums1.numFriendForums"),
617+
agg = sum
618+
)
619+
numFriendOfFriendForums
620+
},
621+
// companies
622+
"personNumFriendOfFriendCompanies" -> Factor(PersonType, PersonWorkAtCompanyType, PersonKnowsPersonType) { case Seq(person, workAt, personKnowsPerson) =>
623+
// direct companies
624+
val directCompanies = person.as("Person")
625+
.join(workAt.as("workAt"), $"workAt.PersonId" === $"Person.id", "leftouter")
626+
627+
val numCompanies = frequency(directCompanies, value = $"CompanyId", by = Seq($"Person.id"), agg = count)
628+
.select($"Person.id".as("Person1Id"), $"frequency".as("numDirectCompanies"))
629+
630+
val friendCompanies = numCompanies.as("numCompanies1")
631+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numCompanies1.Person1Id" === $"knows.Person1Id", "leftouter")
632+
.join(numCompanies.as("numCompanies2"), $"numCompanies2.Person1Id" === $"knows.Person2Id", "leftouter")
633+
634+
// companies of friends
635+
val numFriendCompanies = frequency(
636+
friendCompanies,
637+
value = $"numCompanies2.numDirectCompanies",
638+
by = Seq($"numCompanies1.Person1Id", $"numCompanies1.numDirectCompanies"),
639+
agg = sum
640+
).select($"numCompanies1.Person1Id".as("Person1Id"), $"numCompanies1.numDirectCompanies", $"frequency".as("numFriendCompanies"))
641+
642+
// companies of friends of friends
643+
val friendOfFriendCompanies = numFriendCompanies.as("numFriendCompanies1")
644+
.join(undirectedKnows(personKnowsPerson).as("knows"), $"numFriendCompanies1.Person1Id" === $"knows.Person1Id", "leftouter")
645+
.join(numFriendCompanies.as("numFriendCompanies2"), $"numFriendCompanies2.Person1Id" === $"knows.Person2Id", "leftouter")
646+
647+
val numFriendOfFriendCompanies = frequency(
648+
friendOfFriendCompanies,
649+
value = $"numFriendCompanies2.numFriendCompanies",
650+
by = Seq($"numFriendCompanies1.Person1Id", $"numFriendCompanies1.numDirectCompanies", $"numFriendCompanies1.numFriendCompanies"),
651+
agg = sum
652+
)
653+
numFriendOfFriendCompanies
654+
},
478655
)
479656
}

0 commit comments

Comments
 (0)