Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 62 additions & 15 deletions src/sentry/models/releases/set_commits.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,8 @@ def update_group_resolutions(release, commit_author_by_commit):


def create_commit_authors(commit_list, release):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Up to you but worth adding a test to verify the batching behavior or anything else related to commit authors?

authors = {}

# Collect unique emails and their author data
author_data_by_email = {}
for data in commit_list:
author_email = data.get("author_email")
if author_email is None and data.get("author_name"):
Expand All @@ -293,23 +293,70 @@ def create_commit_authors(commit_list, release):
)

author_email = truncatechars(author_email, 75)
if author_email:
# Lowercase to match CommitAuthorManager.get_or_create behavior
author_email = author_email.lower()

# Store normalized email back in data for later lookup
data["_normalized_email"] = author_email

if author_email and author_email not in author_data_by_email:
author_data_by_email[author_email] = {"name": data.get("author_name")}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can the above section be compressed into the below?

author_email = truncatechars(author_email, 75)
if author_email:
    # Lowercase to match CommitAuthorManager.get_or_create behavior
    author_email = author_email.lower()

    # Store normalized email back in data for later lookup
    data["_normalized_email"] = author_email

    if author_email not in author_data_by_email:
        author_data_by_email[author_email] = {"name": data.get("author_name")}


if not author_data_by_email:
# No authors to process, set all to None
for data in commit_list:
data["author_model"] = None
return

# Batch fetch existing authors
existing_authors = {
author.email: author
for author in CommitAuthor.objects.filter(
organization_id=release.organization_id,
email__in=author_data_by_email.keys(),
)
}

if not author_email:
author = None
elif author_email not in authors:
author_data = {"name": data.get("author_name")}
author, created = CommitAuthor.objects.get_or_create(
# Identify authors needing creation
emails_to_create = set(author_data_by_email.keys()) - set(existing_authors.keys())

if emails_to_create:
# Batch create missing authors
authors_to_create = [
CommitAuthor(
organization_id=release.organization_id,
email=author_email,
defaults=author_data,
email=email,
name=author_data_by_email[email]["name"],
)
if author.name != author_data["name"]:
author.update(name=author_data["name"])
authors[author_email] = author
else:
author = authors[author_email]
for email in emails_to_create
]
CommitAuthor.objects.bulk_create(authors_to_create, ignore_conflicts=True)

data["author_model"] = author
# Re-fetch to get IDs (needed because bulk_create with ignore_conflicts
# doesn't populate IDs on PostgreSQL for conflicting rows)
newly_created = CommitAuthor.objects.filter(
organization_id=release.organization_id,
email__in=emails_to_create,
)
for author in newly_created:
existing_authors[author.email] = author

# Batch update names where needed
authors_to_update = []
for email, author in existing_authors.items():
expected_name = author_data_by_email[email]["name"]
if author.name != expected_name:
author.name = expected_name
authors_to_update.append(author)

if authors_to_update:
CommitAuthor.objects.bulk_update(authors_to_update, ["name"])

# Assign author models to commit data
for data in commit_list:
author_email = data.pop("_normalized_email", None)
data["author_model"] = existing_authors.get(author_email) if author_email else None


def create_repositories(commit_list, release):
Expand Down
Loading