Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 62 additions & 15 deletions src/sentry/models/releases/set_commits.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,8 @@ def update_group_resolutions(release, commit_author_by_commit):


def create_commit_authors(commit_list, release):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Up to you but worth adding a test to verify the batching behavior or anything else related to commit authors?

authors = {}

# Collect unique emails and their author data
author_data_by_email = {}
for data in commit_list:
author_email = data.get("author_email")
if author_email is None and data.get("author_name"):
Expand All @@ -293,23 +293,70 @@ def create_commit_authors(commit_list, release):
)

author_email = truncatechars(author_email, 75)
if author_email:
# Lowercase to match CommitAuthorManager.get_or_create behavior
author_email = author_email.lower()

# Store normalized email back in data for later lookup
data["_normalized_email"] = author_email

if author_email not in author_data_by_email:
author_data_by_email[author_email] = {"name": data.get("author_name")}

if not author_data_by_email:
# No authors to process, set all to None
for data in commit_list:
data["author_model"] = None
return

# Batch fetch existing authors
existing_authors = {
author.email: author
for author in CommitAuthor.objects.filter(
organization_id=release.organization_id,
email__in=author_data_by_email.keys(),
)
}

if not author_email:
author = None
elif author_email not in authors:
author_data = {"name": data.get("author_name")}
author, created = CommitAuthor.objects.get_or_create(
# Identify authors needing creation
emails_to_create = set(author_data_by_email.keys()) - set(existing_authors.keys())

if emails_to_create:
# Batch create missing authors
authors_to_create = [
CommitAuthor(
organization_id=release.organization_id,
email=author_email,
defaults=author_data,
email=email,
name=author_data_by_email[email]["name"],
)
if author.name != author_data["name"]:
author.update(name=author_data["name"])
authors[author_email] = author
else:
author = authors[author_email]
for email in emails_to_create
]
CommitAuthor.objects.bulk_create(authors_to_create, ignore_conflicts=True)

data["author_model"] = author
# Re-fetch to get IDs (needed because bulk_create with ignore_conflicts
# doesn't populate IDs on PostgreSQL for conflicting rows)
newly_created = CommitAuthor.objects.filter(
organization_id=release.organization_id,
email__in=emails_to_create,
)
for author in newly_created:
existing_authors[author.email] = author

# Batch update names where needed
authors_to_update = []
for email, author in existing_authors.items():
expected_name = author_data_by_email[email]["name"]
if author.name != expected_name:
author.name = expected_name
authors_to_update.append(author)

if authors_to_update:
CommitAuthor.objects.bulk_update(authors_to_update, ["name"])

# Assign author models to commit data
for data in commit_list:
author_email = data.pop("_normalized_email", None)
data["author_model"] = existing_authors.get(author_email) if author_email else None


def create_repositories(commit_list, release):
Expand Down
78 changes: 78 additions & 0 deletions tests/sentry/models/test_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,84 @@ def test_long_email(self) -> None:
assert commit.author is not None
assert commit.author.email == truncatechars(commit_email, 75)

@receivers_raise_on_send()
def test_multiple_authors(self) -> None:
"""Test that multiple unique authors are created and existing authors are reused."""
org = self.create_organization(owner=Factories.create_user())
project = self.create_project(organization=org, name="foo")
repo = Repository.objects.create(organization_id=org.id, name="test/repo")

# Pre-create one author to test reuse
existing_author = CommitAuthor.objects.create(
organization_id=org.id,
email="[email protected]",
name="Old Name",
)

release = Release.objects.create(version="abcdabc", organization=org)
release.add_project(project)
release.set_commits(
[
{
"id": "a" * 40,
"repository": repo.name,
"author_email": "[email protected]",
"author_name": "Author One",
"message": "commit 1",
},
{
"id": "b" * 40,
"repository": repo.name,
"author_email": "[email protected]",
"author_name": "Author Two",
"message": "commit 2",
},
{
"id": "c" * 40,
"repository": repo.name,
"author_email": "[email protected]", # Test case-insensitive reuse
"author_name": "New Name", # Test name update
"message": "commit 3",
},
{
"id": "d" * 40,
"repository": repo.name,
# No author_email - should handle gracefully
"message": "commit 4",
},
]
)

# Verify new authors were created
assert CommitAuthor.objects.filter(
organization_id=org.id, email="[email protected]"
).exists()
assert CommitAuthor.objects.filter(
organization_id=org.id, email="[email protected]"
).exists()

# Verify existing author was reused (not duplicated) and name was updated
assert (
CommitAuthor.objects.filter(
organization_id=org.id, email="[email protected]"
).count()
== 1
)
existing_author.refresh_from_db()
assert existing_author.name == "New Name"

# Verify commits have correct authors
commit_a = Commit.objects.get(repository_id=repo.id, key="a" * 40)
assert commit_a.author is not None
assert commit_a.author.email == "[email protected]"

commit_c = Commit.objects.get(repository_id=repo.id, key="c" * 40)
assert commit_c.author is not None
assert commit_c.author.id == existing_author.id

commit_d = Commit.objects.get(repository_id=repo.id, key="d" * 40)
assert commit_d.author is None


class SetRefsTest(SetRefsTestCase):
def setUp(self) -> None:
Expand Down
Loading