evalops
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎app/models/analytics.py‎
Lines changed: 10 additions & 0 deletions b/‎app/models/analytics.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎app/provenance/github_resolver.py‎
Lines changed: 119 additions & 24 deletions b/‎app/provenance/github_resolver.py‎
Lines changed: 119 additions & 24 deletions
@@ -147,9 +147,9 @@ Example ingestion payload:
 
 ## Agent Insights & Analytics
 
-- `/v1/analytics/summary` now surfaces GitHub-aware metrics alongside the existing risk/volume suite: `code_volume`, `code_churn_rate`, `avg_line_complexity`, `agent_response_rate`, `agent_response_p50_hours`, `agent_response_p90_hours`, `reopened_threads`, `force_push_events`, `rewrite_loops`, `human_followup_commits`, `human_followup_fast`, `ci_time_to_green_hours`, `ci_failed_checks`, `agent_commit_ratio`, `commit_lead_time_hours`, `force_push_after_approval`, `human_reviewer_count`, `avg_human_reviewers`, `avg_unique_reviewers`, and `classification_<label>_count` (e.g., `classification_security_count`).
+- `/v1/analytics/summary` now surfaces GitHub-aware metrics alongside the existing risk/volume suite: `code_volume`, `code_churn_rate`, `avg_line_complexity`, `agent_response_rate`, `agent_response_p50_hours`, `agent_response_p90_hours`, `reopened_threads`, `force_push_events`, `rewrite_loops`, `human_followup_commits`, `human_followup_fast`, `ci_time_to_green_hours`, `ci_failed_checks`, `agent_commit_ratio`, `commit_lead_time_hours`, `force_push_after_approval`, `human_reviewer_count`, `avg_human_reviewers`, `avg_unique_reviewers`, `bot_review_events`, `bot_block_events`, `bot_block_overrides`, `bot_block_resolved`, `bot_reviewer_count`, `bot_informational_only_reviewer_count`, `bot_comment_count`, and `classification_<label>_count` (e.g., `classification_security_count`).
 - `/v1/analytics/agents/behavior` returns composite snapshots that now blend code/finding metrics with review conversation health (thread counts, response latency, classification breakdowns), CI friction (failures, time-to-green), commit dynamics (force pushes, rewrite loops, human follow-ups), and attention heatmaps (top paths + hot files) per agent.
-- Snapshots also include reviewer cohort context (`human_reviewer_count`, association breakdowns), provenance anomalies (`force_push_after_approval_count`), and CI failure taxonomy (failing check names and contexts) to highlight operational hotspots.
+- Snapshots also include reviewer cohort context (`human_reviewer_count`, association breakdowns), bot review behavior (`bot_block_events`, `bot_block_overrides`), provenance anomalies (`force_push_after_approval_count`), and CI failure taxonomy (failing check names and contexts) to highlight operational hotspots.
 - Review-focused metrics (`review_comments`, `unique_reviewers`, `review_events`, `agent_comment_mentions`) continue to leverage GitHub PR data when credentials are supplied; classification metrics reflect the resolver's heuristic labeling of each conversation snippet.
 - Use `PROVENANCE_ANALYTICS_DEFAULT_WINDOW` or query parameters such as `?time_window=14d` to track longer horizons and compare agents.
 
 
@@ -70,6 +70,16 @@ class AgentBehaviorSnapshot(BaseModel):
     reviewer_association_breakdown: dict[str, int] = Field(
         default_factory=dict, description="Reviewer participation by GitHub association (member, contributor, etc.)."
     )
+    bot_review_events: int = Field(0, description="Total bot-authored review submissions.")
+    bot_block_events: int = Field(0, description="Bot reviews that requested changes.")
+    bot_informational_events: int = Field(0, description="Bot reviews that left non-blocking feedback.")
+    bot_approval_events: int = Field(0, description="Bot approvals recorded in the window.")
+    bot_block_overrides: int = Field(0, description="Bot change requests overridden by merge without subsequent approval.")
+    bot_block_resolved: int = Field(0, description="Bot change requests later satisfied by bot approval/dismissal.")
+    bot_reviewer_count: int = Field(0, description="Unique bot reviewers participating.")
+    bot_blocking_reviewer_count: int = Field(0, description="Unique bots that issued blocking reviews.")
+    bot_informational_only_reviewer_count: int = Field(0, description="Bots that only left informational comments.")
+    bot_comment_count: int = Field(0, description="Bot-authored review comments captured in conversations.")
     ci_run_count: int = Field(0, description="Number of CI runs/checks evaluated.")
     ci_failure_count: int = Field(0, description="Number of failing CI runs/checks.")
     ci_failed_checks: int = Field(0, description="Unique failing CI checks in the window.")
 
@@ -23,6 +23,7 @@ class ThreadEvent:
     author: str | None
     created_at: datetime | None
     is_agent: bool
+    is_bot: bool = False
 
 
 @dataclass
@@ -33,6 +34,7 @@ class ConversationComments:
     unique_reviewers: set[str]
     agent_mentions: int
     reviewer_identities: dict[str, dict] = field(default_factory=dict)
+    bot_comment_count: int = 0
 
 
 @dataclass
@@ -45,6 +47,11 @@ class ConversationReviews:
     first_review_time: datetime | None
     first_approval_time: datetime | None
     reviewer_identities: dict[str, dict] = field(default_factory=dict)
+    bot_review_events: int = 0
+    bot_block_events: int = 0
+    bot_informational_events: int = 0
+    bot_approval_events: int = 0
+    bot_block_reviews: list[dict] = field(default_factory=list)
 
 
 class GitHubProvenanceResolver:
@@ -164,6 +171,44 @@ def collect_pr_metadata(
         if first_approval_at and merged_at:
             conversation_summary["approval_to_merge_hours"] = self._hours_between(first_approval_at, merged_at)
 
+        reviews_list = conversation.get("reviews", [])
+        bot_reviewers: set[str] = set()
+        bot_blocking_reviewers: set[str] = set()
+        bot_block_overrides = 0
+        bot_block_resolved = 0
+        for review_entry in reviews_list:
+            if not review_entry.get("is_bot"):
+                continue
+            login = review_entry.get("author")
+            if login:
+                bot_reviewers.add(login)
+            if review_entry.get("state") != "CHANGES_REQUESTED":
+                continue
+            if login:
+                bot_blocking_reviewers.add(login)
+            submitted_at = self._parse_iso(review_entry.get("submitted_at"))
+            resolved = False
+            for later_entry in reviews_list:
+                if later_entry is review_entry:
+                    continue
+                if later_entry.get("author") != login or not later_entry.get("is_bot"):
+                    continue
+                later_time = self._parse_iso(later_entry.get("submitted_at"))
+                if submitted_at and later_time and later_time > submitted_at:
+                    if later_entry.get("state") in {"APPROVED", "DISMISSED"}:
+                        resolved = True
+                        break
+            if resolved:
+                bot_block_resolved += 1
+            elif merged_at:
+                bot_block_overrides += 1
+
+        conversation_summary["bot_reviewer_count"] = len(bot_reviewers)
+        conversation_summary["bot_blocking_reviewer_count"] = len(bot_blocking_reviewers)
+        conversation_summary["bot_informational_only_reviewer_count"] = len(bot_reviewers - bot_blocking_reviewers)
+        conversation_summary["bot_block_overrides"] = bot_block_overrides
+        conversation_summary["bot_block_resolved"] = bot_block_resolved
+
         commit_summary = self._summarize_commits(
             pr,
             agent_logins,
@@ -439,6 +484,11 @@ def _build_conversation_snapshot(self, pr, agent_logins: set[str]) -> dict:
             "classification_breakdown": dict(sorted(classification_counts.items(), key=lambda item: item[1], reverse=True)),
             "agent_response_rate": thread_metrics["response_rate"],
             "reviewer_profiles": list(reviewer_identities.values()),
+            "bot_comment_count": comments_info.bot_comment_count,
+            "bot_review_events": reviews_info.bot_review_events,
+            "bot_block_events": reviews_info.bot_block_events,
+            "bot_informational_events": reviews_info.bot_informational_events,
+            "bot_approval_events": reviews_info.bot_approval_events,
         }
 
         if thread_metrics["response_latencies"]:
@@ -476,32 +526,35 @@ def _summarize_comments(
         unique_reviewers: set[str] = set()
         agent_mentions = 0
         reviewer_identities: dict[str, dict] = {}
+        bot_comment_count = 0
 
-        for comment in issue_comments:
+        def _handle_comment(comment, comment_type: str) -> None:
+            nonlocal agent_mentions, bot_comment_count
+            author_login = getattr(getattr(comment, "user", None), "login", None)
+            profile = self._extract_user_profile(comment, author_login)
+            is_bot = self._is_bot_user(author_login, profile)
             entry, thread_key, event, classification = self._serialize_comment(
-                comment, "issue_comment", agent_logins, len(serialized)
+                comment,
+                comment_type,
+                agent_logins,
+                len(serialized),
+                is_bot=is_bot,
             )
             serialized.append(entry)
             thread_events[thread_key].append(event)
             classification_counts[classification] += 1
             if event.author and not event.is_agent:
                 unique_reviewers.add(event.author)
-                reviewer_identities.setdefault(event.author, self._extract_user_profile(comment, event.author))
+                reviewer_identities.setdefault(event.author, profile)
             if AGENT_TRAILER_PATTERN.search(entry["body"]):
                 agent_mentions += 1
+            if is_bot:
+                bot_comment_count += 1
 
+        for comment in issue_comments:
+            _handle_comment(comment, "issue_comment")
         for comment in review_comments:
-            entry, thread_key, event, classification = self._serialize_comment(
-                comment, "review_comment", agent_logins, len(serialized)
-            )
-            serialized.append(entry)
-            thread_events[thread_key].append(event)
-            classification_counts[classification] += 1
-            if event.author and not event.is_agent:
-                unique_reviewers.add(event.author)
-                reviewer_identities.setdefault(event.author, self._extract_user_profile(comment, event.author))
-            if AGENT_TRAILER_PATTERN.search(entry["body"]):
-                agent_mentions += 1
+            _handle_comment(comment, "review_comment")
 
         normalized_threads = {key: events[:] for key, events in thread_events.items()}
         return ConversationComments(
@@ -511,6 +564,7 @@ def _summarize_comments(
             unique_reviewers=unique_reviewers,
             agent_mentions=agent_mentions,
             reviewer_identities=reviewer_identities,
+            bot_comment_count=bot_comment_count,
         )
 
     def _summarize_reviews(
@@ -526,24 +580,31 @@ def _summarize_reviews(
         first_review_time: datetime | None = None
         first_approval_time: datetime | None = None
         reviewer_identities: dict[str, dict] = {}
+        bot_review_events = 0
+        bot_block_events = 0
+        bot_informational_events = 0
+        bot_approval_events = 0
+        bot_block_reviews: list[dict] = []
 
         for review in reviews:
             submitted_at = getattr(review, "submitted_at", None)
             author_login = getattr(getattr(review, "user", None), "login", None)
+            profile = self._extract_user_profile(review, author_login)
+            is_bot = self._is_bot_user(author_login, profile)
             classification = self._classify_review(review.state, review.body or "")
-            entries.append(
-                {
-                    "author": author_login,
-                    "state": review.state,
-                    "submitted_at": self._coerce_iso(submitted_at),
-                    "body": review.body or "",
-                    "classification": classification,
-                }
-            )
+            entry = {
+                "author": author_login,
+                "state": review.state,
+                "submitted_at": self._coerce_iso(submitted_at),
+                "body": review.body or "",
+                "classification": classification,
+                "is_bot": is_bot,
+            }
+            entries.append(entry)
             classification_counts[classification] += 1
             if author_login and not self._is_agent_login(author_login, agent_logins):
                 unique_reviewers.add(author_login)
-                reviewer_identities.setdefault(author_login, self._extract_user_profile(review, author_login))
+                reviewer_identities.setdefault(author_login, profile)
             if submitted_at and (first_review_time is None or submitted_at < first_review_time):
                 first_review_time = submitted_at
             if review.state == "APPROVED":
@@ -553,6 +614,16 @@ def _summarize_reviews(
             if review.state == "CHANGES_REQUESTED":
                 requested_changes += 1
 
+            if is_bot:
+                bot_review_events += 1
+                if review.state == "CHANGES_REQUESTED":
+                    bot_block_events += 1
+                    bot_block_reviews.append(entry)
+                elif review.state == "APPROVED":
+                    bot_approval_events += 1
+                else:
+                    bot_informational_events += 1
+
         return ConversationReviews(
             entries=entries,
             classification_counts=dict(classification_counts),
@@ -562,6 +633,11 @@ def _summarize_reviews(
             first_review_time=first_review_time,
             first_approval_time=first_approval_time,
             reviewer_identities=reviewer_identities,
+            bot_review_events=bot_review_events,
+            bot_block_events=bot_block_events,
+            bot_informational_events=bot_informational_events,
+            bot_approval_events=bot_approval_events,
+            bot_block_reviews=bot_block_reviews,
         )
 
     def _summarize_threads(
@@ -625,6 +701,8 @@ def _serialize_comment(
         comment_type: str,
         agent_logins: set[str],
         index: int,
+        *,
+        is_bot: bool,
     ) -> tuple[dict, str, ThreadEvent, str]:
         body = comment.body or ""
         created_at = getattr(comment, "created_at", None)
@@ -639,6 +717,7 @@ def _serialize_comment(
             "classification": classification,
             "created_at": self._coerce_iso(created_at),
             "updated_at": self._coerce_iso(updated_at),
+            "is_bot": is_bot,
         }
         if comment_type == "review_comment":
             serialized["in_reply_to_id"] = getattr(comment, "in_reply_to_id", None)
@@ -648,6 +727,7 @@ def _serialize_comment(
             author=author_login,
             created_at=created_at,
             is_agent=self._is_agent_login(author_login, agent_logins),
+            is_bot=is_bot,
         )
         return serialized, thread_key, event, classification
 
@@ -687,6 +767,7 @@ def _extract_user_profile(self, source_obj, login: str) -> dict:
             profile[attr] = getattr(user, attr, None)
         association = getattr(source_obj, "author_association", None)
         profile["association"] = association
+        profile["is_bot"] = self._is_bot_login(login) or (profile.get("type") or "").lower() == "bot"
         return profile
 
     @staticmethod
@@ -945,6 +1026,20 @@ def _is_agent_login(self, login: str | None, agent_logins: set[str]) -> bool:
             return True
         return any(keyword in lower for keyword in ("copilot", "claude", "gemini", "gpt", "bard", "llama"))
 
+    @staticmethod
+    def _is_bot_login(login: str | None) -> bool:
+        if not login:
+            return False
+        lower = login.lower()
+        return lower.endswith("[bot]") or lower.endswith("-bot") or lower.endswith("/bot")
+
+    def _is_bot_user(self, login: str | None, profile: dict | None = None) -> bool:
+        if profile and (profile.get("type") or "").lower() == "bot":
+            return True
+        if profile and profile.get("is_bot") is True:
+            return True
+        return self._is_bot_login(login)
+
     @staticmethod
     def _coerce_iso(value) -> str | None:
         if not value: