feat: add Pi-Agent detection via process name and breadcrumb scanning (#21)

comp615 · ampcode-com · web-flow · commit 61d117eb188e · 2026-03-08T05:46:56.000+11:00
_This PR description was generated by AMP._ 🤖 ## Summary Adds detection for three new AI agents and fixes a false-positive matching issue for short process names. ## Changes ### New agents (`src/agent.rs`) **Pi-Agent** — process + breadcrumbs: - Process: `pi` (exact match only — see below) - Breadcrumbs: `~/.pi/agent/sessions/*.jsonl` - Email: `Pi <noreply@pi.dev>` - ℹ️ Pi is a Node.js CLI — `process.name()` returns `"node"`, not `"pi"`. Detection works via `basename(argv[0])` which is `"pi"` (npm bin shim). [Verified on a live Pi session](#21). **Copilot CLI** — process + breadcrumbs: - Process: `copilot` (distinct from `copilot-agent` VS Code extension) - Breadcrumbs: `~/.copilot/session-state/{session-id}/events.jsonl` - Email: `Copilot <223556219+Copilot@users.noreply.github.com>` ([verified](github/copilot-cli#975)) - ⚠️ Placed after `copilot-agent` — `find_by_name` uses `contains()` so the more specific entry must match first **OpenCode** — process only: - Process: `opencode` - Email: `opencode <noreply@opencode.ai>` ([verified](anomalyco/opencode#786)) - No breadcrumbs — sessions stored in SQLite (`~/.local/share/opencode/opencode.db`), TODO for future ### Bug fix: `exact_process_match` flag Added `exact_process_match: bool` to the `Agent` struct. When `true`, `find_by_name` requires the basename to equal the process name exactly instead of using `contains()`. **Problem**: The 2-character process name `pi` caused false positives — e.g., bash's `argv[1]` containing `pipefail` would match `"pipefail".contains("pi")`. **Fix**: Pi-Agent is the only agent with `exact_process_match: true`. All other agents retain the default `false` (backward compatible). Also added `Agent::default()` const fn so entries in `KNOWN_AGENTS` only need to specify fields that differ from defaults, keeping the list concise. ### `README.md` - Updated breadcrumb examples and limitations to include Pi, Copilot CLI, and OpenCode ## Testing ```bash just run --debug # Verify Pi no longer false-positive matches on "pipefail" etc. just run # Only detects actually running agents ``` --------- Co-authored-by: Amp <amp@ampcode.com>
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ It finds agents in four ways:
 1. It checks for agent-specific environment variables.
 2. It walks its own process ancestry, under the assumption that the git commit was initiated by an agent.
 3. It walks up the process tree and checks all descendants of siblings at each level, looking for agents working in the same repository.
-4. It checks agent-specific state files ("breadcrumbs") to determine if an agent was recently active in this repo (e.g. `~/.claude/projects/`, `~/.codex/sessions/`).
+4. It checks agent-specific state files ("breadcrumbs") to determine if an agent was recently active in this repo (e.g. `~/.claude/projects/`, `~/.codex/sessions/`, `~/.pi/agent/sessions/`).
 
 Multiple agents can be attributed in a single commit. Results are deduplicated by email address.
 
@@ -62,7 +62,7 @@ ln -s /usr/local/bin/aittributor .git/hooks/prepare-commit-msg
 
 ## Known limitations
 
-**Process detection is not always possible.** Agents may exit before the commit runs, or use process names that don't match (e.g. Electron-based desktop apps). When process scanning fails, aittributor falls back to agent session history, checking state files for recent activity in the same repo. This fallback only works for agents that write state files (currently Claude and Codex), and it cannot distinguish between an agent that wrote the code being committed and one that was only used for research. The result is a bias toward over-attribution, which is a deliberate tradeoff as undercounting real AI usage is harder to correct after the fact than occasional overcounting.
+**Process detection is not always possible.** Agents may exit before the commit runs, or use process names that don't match (e.g. Electron-based desktop apps). When process scanning fails, aittributor falls back to agent session history, checking state files for recent activity in the same repo. This fallback only works for agents that write scannable state files (currently Claude, Codex, Copilot CLI, and Pi). Some agents like OpenCode store sessions in SQLite, which is not yet supported by the breadcrumb scanner, and it cannot distinguish between an agent that wrote the code being committed and one that was only used for research. The result is a bias toward over-attribution, which is a deliberate tradeoff as undercounting real AI usage is harder to correct after the fact than occasional overcounting.
 
 **Agent-initiated commits are the most reliable.** Attribution is most accurate when the agent itself runs `git commit`. Manual commits while an agent session is open (or recently closed) are the main source of attribution that may not reflect actual code contribution.
 
diff --git a/src/agent.rs b/src/agent.rs
@@ -6,89 +6,111 @@ pub struct Agent {
     pub email: &'static str,
     pub breadcrumb_dir: Option<&'static str>,
     pub breadcrumb_ext: Option<&'static str>,
+    /// When true, process_names must match the basename exactly (not as a substring).
+    /// Use for short names like "pi" that would otherwise false-positive on "pipefail" etc.
+    pub exact_process_match: bool,
 }
 
 pub const KNOWN_AGENTS: &[Agent] = &[
     Agent {
         process_names: &["claude"],
-        env_vars: &[],
         email: "Claude Code <noreply@anthropic.com>",
         breadcrumb_dir: Some(".claude/projects"),
         breadcrumb_ext: Some("jsonl"),
+        ..Agent::default()
     },
     Agent {
         process_names: &["goose"],
-        env_vars: &[],
         email: "Goose <opensource@block.xyz>",
-        breadcrumb_dir: None,
-        breadcrumb_ext: None,
+        ..Agent::default()
     },
     Agent {
         process_names: &["cursor", "cursor-agent"],
-        env_vars: &[],
         email: "Cursor <cursoragent@cursor.com>",
-        breadcrumb_dir: None,
-        breadcrumb_ext: None,
+        ..Agent::default()
     },
     Agent {
         process_names: &["aider"],
-        env_vars: &[],
         email: "Aider <noreply@aider.chat>",
-        breadcrumb_dir: None,
-        breadcrumb_ext: None,
+        ..Agent::default()
     },
     Agent {
         process_names: &["windsurf"],
-        env_vars: &[],
         email: "Windsurf <noreply@codeium.com>",
-        breadcrumb_dir: None,
-        breadcrumb_ext: None,
+        ..Agent::default()
     },
     Agent {
         process_names: &["codex"],
-        env_vars: &[],
         email: "Codex <noreply@openai.com>",
         breadcrumb_dir: Some(".codex/sessions"),
         breadcrumb_ext: Some("jsonl"),
+        ..Agent::default()
     },
     Agent {
         process_names: &["copilot-agent"],
-        env_vars: &[],
         email: "GitHub Copilot <noreply@github.com>",
-        breadcrumb_dir: None,
-        breadcrumb_ext: None,
+        ..Agent::default()
+    },
+    // Copilot CLI is a separate terminal agent from the VS Code extension (copilot-agent above).
+    // Must appear after copilot-agent since find_by_name uses contains() and "copilot" would
+    // otherwise shadow the more specific "copilot-agent" match.
+    Agent {
+        process_names: &["copilot"],
+        email: "Copilot <223556219+Copilot@users.noreply.github.com>",
+        // Sessions stored as JSONL event logs in ~/.copilot/session-state/{session-id}/events.jsonl
+        breadcrumb_dir: Some(".copilot/session-state"),
+        breadcrumb_ext: Some("jsonl"),
+        ..Agent::default()
     },
     Agent {
         process_names: &["amazon-q"],
-        env_vars: &[],
         email: "Amazon Q Developer <noreply@amazon.com>",
-        breadcrumb_dir: None,
-        breadcrumb_ext: None,
+        ..Agent::default()
     },
     Agent {
         process_names: &["amp"],
-        env_vars: &[],
         email: "Amp <amp@ampcode.com>",
-        breadcrumb_dir: None,
-        breadcrumb_ext: None,
+        ..Agent::default()
     },
     Agent {
-        process_names: &[],
         env_vars: &[("CLINE_ACTIVE", "true")],
         email: "Cline <noreply@cline.bot>",
-        breadcrumb_dir: None,
-        breadcrumb_ext: None,
+        ..Agent::default()
     },
     Agent {
         process_names: &["gemini"],
-        env_vars: &[],
         email: "Gemini CLI Agent <gemini-cli-agent@google.com>",
-        breadcrumb_dir: None,
-        breadcrumb_ext: None,
+        ..Agent::default()
+    },
+    Agent {
+        process_names: &["pi"],
+        email: "Pi <noreply@pi.dev>",
+        breadcrumb_dir: Some(".pi/agent/sessions"),
+        breadcrumb_ext: Some("jsonl"),
+        exact_process_match: true,
+        ..Agent::default()
+    },
+    // TODO: OpenCode sessions are stored in SQLite (~/.local/share/opencode/opencode.db),
+    // not flat files. Breadcrumb scanning would require a new SQLite-based strategy.
+    Agent {
+        process_names: &["opencode"],
+        email: "opencode <noreply@opencode.ai>",
+        ..Agent::default()
     },
 ];
 
 impl Agent {
+    const fn default() -> Self {
+        Agent {
+            process_names: &[],
+            env_vars: &[],
+            email: "",
+            breadcrumb_dir: None,
+            breadcrumb_ext: None,
+            exact_process_match: false,
+        }
+    }
+
     /// Extract the bare email address from a "Name <addr>" string.
     /// e.g. "Claude Code <noreply@anthropic.com>" → "noreply@anthropic.com"
     pub fn extract_email_addr(email: &str) -> &str {
@@ -105,7 +127,14 @@ impl Agent {
         let basename_lower = basename.to_lowercase();
 
         KNOWN_AGENTS.iter().find(|agent| {
-            !agent.process_names.is_empty() && agent.process_names.iter().any(|&pn| basename_lower.contains(pn))
+            !agent.process_names.is_empty()
+                && agent.process_names.iter().any(|&pn| {
+                    if agent.exact_process_match {
+                        basename_lower == pn
+                    } else {
+                        basename_lower.contains(pn)
+                    }
+                })
         })
     }