Skip to content

Commit 6bf539a

Browse files
committed
feat: track bot review overrides and reviewer load
1 parent f75f3de commit 6bf539a

File tree

6 files changed

+218
-26
lines changed

6 files changed

+218
-26
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,9 @@ Example ingestion payload:
147147

148148
## Agent Insights & Analytics
149149

150-
- `/v1/analytics/summary` now surfaces GitHub-aware metrics alongside the existing risk/volume suite: `code_volume`, `code_churn_rate`, `avg_line_complexity`, `agent_response_rate`, `agent_response_p50_hours`, `agent_response_p90_hours`, `reopened_threads`, `force_push_events`, `rewrite_loops`, `human_followup_commits`, `human_followup_fast`, `ci_time_to_green_hours`, `ci_failed_checks`, `agent_commit_ratio`, `commit_lead_time_hours`, `force_push_after_approval`, `human_reviewer_count`, `avg_human_reviewers`, `avg_unique_reviewers`, and `classification_<label>_count` (e.g., `classification_security_count`).
150+
- `/v1/analytics/summary` now surfaces GitHub-aware metrics alongside the existing risk/volume suite: `code_volume`, `code_churn_rate`, `avg_line_complexity`, `agent_response_rate`, `agent_response_p50_hours`, `agent_response_p90_hours`, `reopened_threads`, `force_push_events`, `rewrite_loops`, `human_followup_commits`, `human_followup_fast`, `ci_time_to_green_hours`, `ci_failed_checks`, `agent_commit_ratio`, `commit_lead_time_hours`, `force_push_after_approval`, `human_reviewer_count`, `avg_human_reviewers`, `avg_unique_reviewers`, `bot_review_events`, `bot_block_events`, `bot_block_overrides`, `bot_block_resolved`, `bot_reviewer_count`, `bot_informational_only_reviewer_count`, `bot_comment_count`, and `classification_<label>_count` (e.g., `classification_security_count`).
151151
- `/v1/analytics/agents/behavior` returns composite snapshots that now blend code/finding metrics with review conversation health (thread counts, response latency, classification breakdowns), CI friction (failures, time-to-green), commit dynamics (force pushes, rewrite loops, human follow-ups), and attention heatmaps (top paths + hot files) per agent.
152-
- Snapshots also include reviewer cohort context (`human_reviewer_count`, association breakdowns), provenance anomalies (`force_push_after_approval_count`), and CI failure taxonomy (failing check names and contexts) to highlight operational hotspots.
152+
- Snapshots also include reviewer cohort context (`human_reviewer_count`, association breakdowns), bot review behavior (`bot_block_events`, `bot_block_overrides`), provenance anomalies (`force_push_after_approval_count`), and CI failure taxonomy (failing check names and contexts) to highlight operational hotspots.
153153
- Review-focused metrics (`review_comments`, `unique_reviewers`, `review_events`, `agent_comment_mentions`) continue to leverage GitHub PR data when credentials are supplied; classification metrics reflect the resolver's heuristic labeling of each conversation snippet.
154154
- Use `PROVENANCE_ANALYTICS_DEFAULT_WINDOW` or query parameters such as `?time_window=14d` to track longer horizons and compare agents.
155155

app/models/analytics.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,16 @@ class AgentBehaviorSnapshot(BaseModel):
7070
reviewer_association_breakdown: dict[str, int] = Field(
7171
default_factory=dict, description="Reviewer participation by GitHub association (member, contributor, etc.)."
7272
)
73+
bot_review_events: int = Field(0, description="Total bot-authored review submissions.")
74+
bot_block_events: int = Field(0, description="Bot reviews that requested changes.")
75+
bot_informational_events: int = Field(0, description="Bot reviews that left non-blocking feedback.")
76+
bot_approval_events: int = Field(0, description="Bot approvals recorded in the window.")
77+
bot_block_overrides: int = Field(0, description="Bot change requests overridden by merge without subsequent approval.")
78+
bot_block_resolved: int = Field(0, description="Bot change requests later satisfied by bot approval/dismissal.")
79+
bot_reviewer_count: int = Field(0, description="Unique bot reviewers participating.")
80+
bot_blocking_reviewer_count: int = Field(0, description="Unique bots that issued blocking reviews.")
81+
bot_informational_only_reviewer_count: int = Field(0, description="Bots that only left informational comments.")
82+
bot_comment_count: int = Field(0, description="Bot-authored review comments captured in conversations.")
7383
ci_run_count: int = Field(0, description="Number of CI runs/checks evaluated.")
7484
ci_failure_count: int = Field(0, description="Number of failing CI runs/checks.")
7585
ci_failed_checks: int = Field(0, description="Unique failing CI checks in the window.")

app/provenance/github_resolver.py

Lines changed: 119 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class ThreadEvent:
2323
author: str | None
2424
created_at: datetime | None
2525
is_agent: bool
26+
is_bot: bool = False
2627

2728

2829
@dataclass
@@ -33,6 +34,7 @@ class ConversationComments:
3334
unique_reviewers: set[str]
3435
agent_mentions: int
3536
reviewer_identities: dict[str, dict] = field(default_factory=dict)
37+
bot_comment_count: int = 0
3638

3739

3840
@dataclass
@@ -45,6 +47,11 @@ class ConversationReviews:
4547
first_review_time: datetime | None
4648
first_approval_time: datetime | None
4749
reviewer_identities: dict[str, dict] = field(default_factory=dict)
50+
bot_review_events: int = 0
51+
bot_block_events: int = 0
52+
bot_informational_events: int = 0
53+
bot_approval_events: int = 0
54+
bot_block_reviews: list[dict] = field(default_factory=list)
4855

4956

5057
class GitHubProvenanceResolver:
@@ -164,6 +171,44 @@ def collect_pr_metadata(
164171
if first_approval_at and merged_at:
165172
conversation_summary["approval_to_merge_hours"] = self._hours_between(first_approval_at, merged_at)
166173

174+
reviews_list = conversation.get("reviews", [])
175+
bot_reviewers: set[str] = set()
176+
bot_blocking_reviewers: set[str] = set()
177+
bot_block_overrides = 0
178+
bot_block_resolved = 0
179+
for review_entry in reviews_list:
180+
if not review_entry.get("is_bot"):
181+
continue
182+
login = review_entry.get("author")
183+
if login:
184+
bot_reviewers.add(login)
185+
if review_entry.get("state") != "CHANGES_REQUESTED":
186+
continue
187+
if login:
188+
bot_blocking_reviewers.add(login)
189+
submitted_at = self._parse_iso(review_entry.get("submitted_at"))
190+
resolved = False
191+
for later_entry in reviews_list:
192+
if later_entry is review_entry:
193+
continue
194+
if later_entry.get("author") != login or not later_entry.get("is_bot"):
195+
continue
196+
later_time = self._parse_iso(later_entry.get("submitted_at"))
197+
if submitted_at and later_time and later_time > submitted_at:
198+
if later_entry.get("state") in {"APPROVED", "DISMISSED"}:
199+
resolved = True
200+
break
201+
if resolved:
202+
bot_block_resolved += 1
203+
elif merged_at:
204+
bot_block_overrides += 1
205+
206+
conversation_summary["bot_reviewer_count"] = len(bot_reviewers)
207+
conversation_summary["bot_blocking_reviewer_count"] = len(bot_blocking_reviewers)
208+
conversation_summary["bot_informational_only_reviewer_count"] = len(bot_reviewers - bot_blocking_reviewers)
209+
conversation_summary["bot_block_overrides"] = bot_block_overrides
210+
conversation_summary["bot_block_resolved"] = bot_block_resolved
211+
167212
commit_summary = self._summarize_commits(
168213
pr,
169214
agent_logins,
@@ -439,6 +484,11 @@ def _build_conversation_snapshot(self, pr, agent_logins: set[str]) -> dict:
439484
"classification_breakdown": dict(sorted(classification_counts.items(), key=lambda item: item[1], reverse=True)),
440485
"agent_response_rate": thread_metrics["response_rate"],
441486
"reviewer_profiles": list(reviewer_identities.values()),
487+
"bot_comment_count": comments_info.bot_comment_count,
488+
"bot_review_events": reviews_info.bot_review_events,
489+
"bot_block_events": reviews_info.bot_block_events,
490+
"bot_informational_events": reviews_info.bot_informational_events,
491+
"bot_approval_events": reviews_info.bot_approval_events,
442492
}
443493

444494
if thread_metrics["response_latencies"]:
@@ -476,32 +526,35 @@ def _summarize_comments(
476526
unique_reviewers: set[str] = set()
477527
agent_mentions = 0
478528
reviewer_identities: dict[str, dict] = {}
529+
bot_comment_count = 0
479530

480-
for comment in issue_comments:
531+
def _handle_comment(comment, comment_type: str) -> None:
532+
nonlocal agent_mentions, bot_comment_count
533+
author_login = getattr(getattr(comment, "user", None), "login", None)
534+
profile = self._extract_user_profile(comment, author_login)
535+
is_bot = self._is_bot_user(author_login, profile)
481536
entry, thread_key, event, classification = self._serialize_comment(
482-
comment, "issue_comment", agent_logins, len(serialized)
537+
comment,
538+
comment_type,
539+
agent_logins,
540+
len(serialized),
541+
is_bot=is_bot,
483542
)
484543
serialized.append(entry)
485544
thread_events[thread_key].append(event)
486545
classification_counts[classification] += 1
487546
if event.author and not event.is_agent:
488547
unique_reviewers.add(event.author)
489-
reviewer_identities.setdefault(event.author, self._extract_user_profile(comment, event.author))
548+
reviewer_identities.setdefault(event.author, profile)
490549
if AGENT_TRAILER_PATTERN.search(entry["body"]):
491550
agent_mentions += 1
551+
if is_bot:
552+
bot_comment_count += 1
492553

554+
for comment in issue_comments:
555+
_handle_comment(comment, "issue_comment")
493556
for comment in review_comments:
494-
entry, thread_key, event, classification = self._serialize_comment(
495-
comment, "review_comment", agent_logins, len(serialized)
496-
)
497-
serialized.append(entry)
498-
thread_events[thread_key].append(event)
499-
classification_counts[classification] += 1
500-
if event.author and not event.is_agent:
501-
unique_reviewers.add(event.author)
502-
reviewer_identities.setdefault(event.author, self._extract_user_profile(comment, event.author))
503-
if AGENT_TRAILER_PATTERN.search(entry["body"]):
504-
agent_mentions += 1
557+
_handle_comment(comment, "review_comment")
505558

506559
normalized_threads = {key: events[:] for key, events in thread_events.items()}
507560
return ConversationComments(
@@ -511,6 +564,7 @@ def _summarize_comments(
511564
unique_reviewers=unique_reviewers,
512565
agent_mentions=agent_mentions,
513566
reviewer_identities=reviewer_identities,
567+
bot_comment_count=bot_comment_count,
514568
)
515569

516570
def _summarize_reviews(
@@ -526,24 +580,31 @@ def _summarize_reviews(
526580
first_review_time: datetime | None = None
527581
first_approval_time: datetime | None = None
528582
reviewer_identities: dict[str, dict] = {}
583+
bot_review_events = 0
584+
bot_block_events = 0
585+
bot_informational_events = 0
586+
bot_approval_events = 0
587+
bot_block_reviews: list[dict] = []
529588

530589
for review in reviews:
531590
submitted_at = getattr(review, "submitted_at", None)
532591
author_login = getattr(getattr(review, "user", None), "login", None)
592+
profile = self._extract_user_profile(review, author_login)
593+
is_bot = self._is_bot_user(author_login, profile)
533594
classification = self._classify_review(review.state, review.body or "")
534-
entries.append(
535-
{
536-
"author": author_login,
537-
"state": review.state,
538-
"submitted_at": self._coerce_iso(submitted_at),
539-
"body": review.body or "",
540-
"classification": classification,
541-
}
542-
)
595+
entry = {
596+
"author": author_login,
597+
"state": review.state,
598+
"submitted_at": self._coerce_iso(submitted_at),
599+
"body": review.body or "",
600+
"classification": classification,
601+
"is_bot": is_bot,
602+
}
603+
entries.append(entry)
543604
classification_counts[classification] += 1
544605
if author_login and not self._is_agent_login(author_login, agent_logins):
545606
unique_reviewers.add(author_login)
546-
reviewer_identities.setdefault(author_login, self._extract_user_profile(review, author_login))
607+
reviewer_identities.setdefault(author_login, profile)
547608
if submitted_at and (first_review_time is None or submitted_at < first_review_time):
548609
first_review_time = submitted_at
549610
if review.state == "APPROVED":
@@ -553,6 +614,16 @@ def _summarize_reviews(
553614
if review.state == "CHANGES_REQUESTED":
554615
requested_changes += 1
555616

617+
if is_bot:
618+
bot_review_events += 1
619+
if review.state == "CHANGES_REQUESTED":
620+
bot_block_events += 1
621+
bot_block_reviews.append(entry)
622+
elif review.state == "APPROVED":
623+
bot_approval_events += 1
624+
else:
625+
bot_informational_events += 1
626+
556627
return ConversationReviews(
557628
entries=entries,
558629
classification_counts=dict(classification_counts),
@@ -562,6 +633,11 @@ def _summarize_reviews(
562633
first_review_time=first_review_time,
563634
first_approval_time=first_approval_time,
564635
reviewer_identities=reviewer_identities,
636+
bot_review_events=bot_review_events,
637+
bot_block_events=bot_block_events,
638+
bot_informational_events=bot_informational_events,
639+
bot_approval_events=bot_approval_events,
640+
bot_block_reviews=bot_block_reviews,
565641
)
566642

567643
def _summarize_threads(
@@ -625,6 +701,8 @@ def _serialize_comment(
625701
comment_type: str,
626702
agent_logins: set[str],
627703
index: int,
704+
*,
705+
is_bot: bool,
628706
) -> tuple[dict, str, ThreadEvent, str]:
629707
body = comment.body or ""
630708
created_at = getattr(comment, "created_at", None)
@@ -639,6 +717,7 @@ def _serialize_comment(
639717
"classification": classification,
640718
"created_at": self._coerce_iso(created_at),
641719
"updated_at": self._coerce_iso(updated_at),
720+
"is_bot": is_bot,
642721
}
643722
if comment_type == "review_comment":
644723
serialized["in_reply_to_id"] = getattr(comment, "in_reply_to_id", None)
@@ -648,6 +727,7 @@ def _serialize_comment(
648727
author=author_login,
649728
created_at=created_at,
650729
is_agent=self._is_agent_login(author_login, agent_logins),
730+
is_bot=is_bot,
651731
)
652732
return serialized, thread_key, event, classification
653733

@@ -687,6 +767,7 @@ def _extract_user_profile(self, source_obj, login: str) -> dict:
687767
profile[attr] = getattr(user, attr, None)
688768
association = getattr(source_obj, "author_association", None)
689769
profile["association"] = association
770+
profile["is_bot"] = self._is_bot_login(login) or (profile.get("type") or "").lower() == "bot"
690771
return profile
691772

692773
@staticmethod
@@ -945,6 +1026,20 @@ def _is_agent_login(self, login: str | None, agent_logins: set[str]) -> bool:
9451026
return True
9461027
return any(keyword in lower for keyword in ("copilot", "claude", "gemini", "gpt", "bard", "llama"))
9471028

1029+
@staticmethod
1030+
def _is_bot_login(login: str | None) -> bool:
1031+
if not login:
1032+
return False
1033+
lower = login.lower()
1034+
return lower.endswith("[bot]") or lower.endswith("-bot") or lower.endswith("/bot")
1035+
1036+
def _is_bot_user(self, login: str | None, profile: dict | None = None) -> bool:
1037+
if profile and (profile.get("type") or "").lower() == "bot":
1038+
return True
1039+
if profile and profile.get("is_bot") is True:
1040+
return True
1041+
return self._is_bot_login(login)
1042+
9481043
@staticmethod
9491044
def _coerce_iso(value) -> str | None:
9501045
if not value:

0 commit comments

Comments
 (0)