From ce5078018228c166e56cf77b7e2b958167f497b3 Mon Sep 17 00:00:00 2001
From: Mickael Farina <farina.mickael@gmail.com>
Date: Fri, 22 May 2026 15:48:51 +0200
Subject: [PATCH] refactor(llm): migrate iMessage + Telegram call_llm onto
 codec_llm (A-12 bridges)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-3E-bridges. The two outbound bridges hand-rolled the same OpenAI
chat/completions text call; both now route through codec_llm.call.

Bridges WANT graceful degradation, so the default never-raise behavior is
exactly right: codec_llm.call returns "" on error/no-choices/timeout/empty,
which maps back to the bridge's None-on-failure contract via
`content if content else None`. enable_thinking=False is preserved by passing
llm_cfg["kwargs"] minus chat_template_kwargs; <think> stripping moves into
codec_llm. Auth parity (Bearer iff api_key) and the 1500/0.7 tuning are kept.

The vision sites in both bridges (llm_cfg["vision_url"]) are A-11 / codec_vision,
not this tranche — left untouched. Removed a now-dead `import re` in
codec_imessage (its only use was the inline <think> strip).

Tests: tests/test_llm_bridges.py (8 — each bridge returns content on success,
returns None on empty/failure (graceful-degradation contract), passes
base_url/model/api_key through and filters chat_template_kwargs out of
extra_kwargs; source invariants confirm codec_llm.call + only the vision
/chat/completions site remains). Full suite 1431 passing, 23 known-baseline
failures, zero new. Net -50 LOC. Zero net-new ruff. No skills/ touched.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 AGENTS.md                                  |   2 +-
 codec_imessage.py                          |  54 +++--------
 codec_telegram.py                          |  47 +++-------
 docs/PR3E-BRIDGES-DESIGN.md                |  54 +++++++++++
 docs/audits/PHASE-1-CODE-QUALITY.md        |   4 +-
 docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md |   1 +
 tests/test_llm_bridges.py                  | 100 +++++++++++++++++++++
 7 files changed, 183 insertions(+), 79 deletions(-)
 create mode 100644 docs/PR3E-BRIDGES-DESIGN.md
 create mode 100644 tests/test_llm_bridges.py
diff --git a/AGENTS.md b/AGENTS.md
index 83f5de1..5d69fd3 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -56,7 +56,7 @@ docs/                        API.md, MCP_HTTP_SETUP.md, CONTEXT_REPORT.md, desig
 
 Other engine modules (`codec_overlays`, `codec_metrics`, `codec_logging`, `codec_gdocs`, `codec_google_auth`, `codec_cdp`, `codec_llm_proxy`, `codec_retry`, `codec_alerts`, `codec_search`, `codec_textassist`, `codec_watcher`, `codec_watchdog`) are internal helpers — read them when you need them, but they're not part of the navigation surface for an agent making structural changes. (Keyboard handling — wake word, F13 toggle, F18 voice, double-tap — lives **inline in `codec.py`** in the `codec` PM2 process; the old standalone `codec_keyboard.py` was deleted as a dead duplicate per A-8.)
 
-**Canonical LLM + vision helpers (PR-3E, A-11/A-12).** `codec_vision.py` is the SINGLE source for screen-vision (`describe_sync` / `describe_async`, Gemini-flash → local-Qwen-VL fallback, config read live from `codec_config`) — used by `codec.py`, `codec_voice`, `codec_session`. `codec_llm.py` is the canonical chat/completions caller (`call()` + `strip_think`/`extract_content` — headers, Bearer auth, `enable_thinking`, `<think>` strip, `choices/reasoning` parse, retry+backoff, never-raises). NOTE: `codec_llm_proxy.py` is a priority *queue* (semaphore), NOT an HTTP caller — don't confuse the two. A-12 is migrating the ~45 inline `chat/completions` sites onto `codec_llm` in phased tranches. Done: `codec_llm.call()` (non-stream; + `raise_on_error=True` raising `codec_llm.LLMError` for fail-loud callers) + `stream()` (sync SSE generator, yields raw deltas, never-raises); migrated sites = codec.py voice-reply, `codec_session.qwen_call` + `qwen_stream`, `codec_compaction`, `codec_dictate`, `codec_textassist`, the regen script, and `codec_agent_plan`/`codec_agent_runner` `_qwen_chat` (adapter maps `LLMError` → their public `QwenUnavailableError`). Pending tranches: an async `astream()` for voice `_stream_qwen` + agents `Agent.run` (queue stays at the call site — `codec_llm` never owns the semaphore), dashboard (4 non-stream + the `[SKILL:…]` stream tag-machine, which keeps its own parser and consumes only `stream()`'s raw tokens), bridges, and a skills tranche.
+**Canonical LLM + vision helpers (PR-3E, A-11/A-12).** `codec_vision.py` is the SINGLE source for screen-vision (`describe_sync` / `describe_async`, Gemini-flash → local-Qwen-VL fallback, config read live from `codec_config`) — used by `codec.py`, `codec_voice`, `codec_session`. `codec_llm.py` is the canonical chat/completions caller (`call()` + `strip_think`/`extract_content` — headers, Bearer auth, `enable_thinking`, `<think>` strip, `choices/reasoning` parse, retry+backoff, never-raises). NOTE: `codec_llm_proxy.py` is a priority *queue* (semaphore), NOT an HTTP caller — don't confuse the two. A-12 is migrating the ~45 inline `chat/completions` sites onto `codec_llm` in phased tranches. Done: `codec_llm.call()` (non-stream; + `raise_on_error=True` raising `codec_llm.LLMError` for fail-loud callers) + `stream()` (sync SSE generator, yields raw deltas, never-raises); migrated sites = codec.py voice-reply, `codec_session.qwen_call` + `qwen_stream`, `codec_compaction`, `codec_dictate`, `codec_textassist`, the regen script, and `codec_agent_plan`/`codec_agent_runner` `_qwen_chat` (adapter maps `LLMError` → their public `QwenUnavailableError`), and the `codec_telegram`/`codec_imessage` `call_llm` text sites (default never-raise → `None`-contract preserved; their vision sites are A-11/codec_vision, not migrated here). Pending tranches: an async `astream()` for voice `_stream_qwen` + agents `Agent.run` (queue stays at the call site — `codec_llm` never owns the semaphore), dashboard (4 non-stream + the `[SKILL:…]` stream tag-machine, which keeps its own parser and consumes only `stream()`'s raw tokens), and a skills tranche.
 
 ## 3. Agent + Crew runtime
 
diff --git a/codec_imessage.py b/codec_imessage.py
index f9ae2bd..ce00b73 100644
--- a/codec_imessage.py
+++ b/codec_imessage.py
@@ -16,7 +16,6 @@
 """
 
 import os
-import re
 import json
 import time
 import sqlite3
@@ -323,47 +322,18 @@ def call_llm(text, sender, llm_cfg, conversation_history=None, system_prompt_ove
 
     messages.append({"role": "user", "content": text})
 
-    headers = {"Content-Type": "application/json"}
-    if llm_cfg["api_key"]:
-        headers["Authorization"] = f"Bearer {llm_cfg['api_key']}"
-
-    payload = {
-        "model": llm_cfg["model"],
-        "messages": messages,
-        "max_tokens": 1500,
-        "temperature": 0.7,
-        "stream": False,
-        "chat_template_kwargs": {"enable_thinking": False},
-    }
-    # Merge extra kwargs (but protect chat_template_kwargs)
-    payload.update({k: v for k, v in llm_cfg["kwargs"].items() if k != "chat_template_kwargs"})
-
-    try:
-        r = requests.post(
-            f"{llm_cfg['base_url']}/chat/completions",
-            json=payload,
-            headers=headers,
-            timeout=120,
-        )
-        data = r.json()
-
-        if "error" in data:
-            log.error(f"LLM error: {data['error']}")
-            return None
-        if "choices" not in data or not data["choices"]:
-            log.error(f"LLM no choices: {str(data)[:200]}")
-            return None
-
-        content = (data["choices"][0]["message"].get("content") or "").strip()
-        # Strip thinking tags
-        content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
-        return content if content else None
-    except requests.exceptions.Timeout:
-        log.error("LLM timeout")
-        return None
-    except Exception as e:
-        log.error(f"LLM call failed: {e}")
-        return None
+    # A-12 (PR-3E-bridges): canonical codec_llm.call. Never-raise -> "" on any
+    # failure (error/no-choices/timeout/empty); mapped back to the bridge's None
+    # contract for graceful degradation. codec_llm strips <think>; kwargs are
+    # passed minus chat_template_kwargs so enable_thinking=False is preserved.
+    import codec_llm
+    extra = {k: v for k, v in llm_cfg["kwargs"].items() if k != "chat_template_kwargs"}
+    content = codec_llm.call(
+        messages, base_url=llm_cfg["base_url"], model=llm_cfg["model"],
+        api_key=llm_cfg["api_key"], max_tokens=1500, temperature=0.7,
+        timeout=120, extra_kwargs=extra,
+    )
+    return content if content else None
 
 
 # ── Vision (image attachments) ──────────────────────────────────────────────
diff --git a/codec_telegram.py b/codec_telegram.py
index 1ad6719..1c733f5 100644
--- a/codec_telegram.py
+++ b/codec_telegram.py
@@ -463,41 +463,18 @@ def call_llm(text, llm_cfg, conversation_history=None, system_prompt_override=No
         messages.extend(conversation_history[-8:])
     messages.append({"role": "user", "content": text})
 
-    headers = {"Content-Type": "application/json"}
-    if llm_cfg["api_key"]:
-        headers["Authorization"] = f"Bearer {llm_cfg['api_key']}"
-
-    payload = {
-        "model": llm_cfg["model"],
-        "messages": messages,
-        "max_tokens": 1500,
-        "temperature": 0.7,
-        "stream": False,
-        "chat_template_kwargs": {"enable_thinking": False},
-    }
-    payload.update({k: v for k, v in llm_cfg["kwargs"].items() if k != "chat_template_kwargs"})
-
-    try:
-        r = requests.post(
-            f"{llm_cfg['base_url']}/chat/completions",
-            json=payload, headers=headers, timeout=120,
-        )
-        data = r.json()
-        if "error" in data:
-            log.error(f"LLM error: {data['error']}")
-            return None
-        if "choices" not in data or not data["choices"]:
-            log.error(f"LLM no choices: {str(data)[:200]}")
-            return None
-        content = (data["choices"][0]["message"].get("content") or "").strip()
-        content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
-        return content if content else None
-    except requests.exceptions.Timeout:
-        log.error("LLM timeout")
-        return None
-    except Exception as e:
-        log.error(f"LLM call failed: {e}")
-        return None
+    # A-12 (PR-3E-bridges): canonical codec_llm.call. Never-raise -> "" on any
+    # failure (error/no-choices/timeout/empty); mapped back to the bridge's None
+    # contract for graceful degradation. codec_llm strips <think>; kwargs are
+    # passed minus chat_template_kwargs so enable_thinking=False is preserved.
+    import codec_llm
+    extra = {k: v for k, v in llm_cfg["kwargs"].items() if k != "chat_template_kwargs"}
+    content = codec_llm.call(
+        messages, base_url=llm_cfg["base_url"], model=llm_cfg["model"],
+        api_key=llm_cfg["api_key"], max_tokens=1500, temperature=0.7,
+        timeout=120, extra_kwargs=extra,
+    )
+    return content if content else None
 
 
 # ── Vision (photo messages) ─────────────────────────────────────────────────
diff --git a/docs/PR3E-BRIDGES-DESIGN.md b/docs/PR3E-BRIDGES-DESIGN.md
new file mode 100644
index 0000000..a7889b8
--- /dev/null
+++ b/docs/PR3E-BRIDGES-DESIGN.md
@@ -0,0 +1,54 @@
+# PR-3E-bridges — A-12: migrate the iMessage + Telegram `call_llm` text sites (DESIGN)
+
+**Status:** IMPLEMENTED. Both `call_llm` text sites route through `codec_llm.call` (default never-raise → `None`-contract preserved; `chat_template_kwargs` filtered from kwargs). Removed a now-dead `import re` from `codec_imessage`. 8 new tests (`tests/test_llm_bridges.py`); full suite 1431 passing, zero new; zero net-new ruff.
+**Finding:** A-12 (continuation). The two outbound bridges hand-roll the same OpenAI `chat/completions` text call.
+**Wave:** 3. Smallest, lowest-risk tranche — never-raise is the *default* `codec_llm.call` behavior, so this is a near-mechanical swap.
+
+---
+
+## 1. The two sites (identical shape)
+
+- `codec_telegram.call_llm` (text site `:482`)
+- `codec_imessage.call_llm` (text site `:343`)
+
+Both build `messages` (system + last-8 history + user), set headers (Bearer iff `llm_cfg["api_key"]`), POST a payload (`max_tokens=1500`, `temperature=0.7`, `stream=False`, `chat_template_kwargs.enable_thinking=False`, plus `llm_cfg["kwargs"]` **minus** any `chat_template_kwargs`), then:
+- `{"error": …}` or no `choices` → log + **return `None`**
+- parse content, strip `<think>`, `return content if content else None`
+- `requests.exceptions.Timeout` → `None`; any other `Exception` → `None`
+
+**Contract: `call_llm` returns `None` on every failure/empty** — the caller degrades gracefully (no reply / fallback). This is the *opposite* of the 2c sites: bridges WANT silent best-effort, so `codec_llm.call`'s default never-raise is exactly right.
+
+The vision sites (`telegram:519`, `imessage:393`, via `llm_cfg["vision_url"]`) are **A-11** (codec_vision), NOT this tranche — left untouched.
+
+## 2. Migration (each site)
+
+Replace the headers + payload + `try/except` block with:
+```python
+import codec_llm
+extra = {k: v for k, v in llm_cfg["kwargs"].items() if k != "chat_template_kwargs"}
+content = codec_llm.call(
+    messages, base_url=llm_cfg["base_url"], model=llm_cfg["model"],
+    api_key=llm_cfg["api_key"], max_tokens=1500, temperature=0.7,
+    timeout=120, extra_kwargs=extra,
+)
+return content if content else None
+```
+- **`None`-contract preserved**: `codec_llm.call` returns `""` on error / no-choices / timeout / exception / empty → `content if content else None` → `None`. Exact parity for the caller.
+- **`enable_thinking=False` preserved**: `extra` strips `chat_template_kwargs` (same as the original) so the explicit flag in `_build_request` isn't overridden.
+- **`<think>` strip** now handled by `codec_llm`.
+- **Auth parity**: `api_key=llm_cfg["api_key"]` → Bearer iff non-empty (same as original).
+
+## 3. Behavior deltas (minor, documented)
+- **Logging granularity**: the original logged distinct messages ("LLM error" / "LLM no choices" / "LLM timeout" / "LLM call failed"); `codec_llm` logs one generic warning. Same observable behavior (`None` returned) — only the log string differs.
+- Everything else is byte-parity for the caller.
+
+## 4. Test plan
+- `tests/test_llm_bridges.py`: for each bridge — `call_llm` returns the content on success (monkeypatched `codec_llm.call`); returns **`None`** when `codec_llm.call` returns `""` (graceful-degradation contract); passes `base_url`/`model`/`api_key` through and `extra_kwargs` with `chat_template_kwargs` **filtered out**. Source invariants: each calls `codec_llm.call(`, and `/chat/completions` count drops to **1** (only the vision site remains).
+- Full suite: expect the 23 known-baseline failures, **zero new**. No `skills/` touched → no manifest regen.
+
+## 5. Risk + rollback
+- **Blast radius:** 2 functions in 2 outbound bridges. `None`-contract preserved → callers unaffected. Inbound stays PWA-only (unchanged).
+- **Rollback:** single-commit revert.
+
+## 6. After bridges — remaining A-12
+dashboard (4 non-stream + the `[SKILL:…]` stream tag-machine), voice `_stream_qwen` + agents `Agent.run` (async `astream()` + queue at the call site), skills tranche (translate/fact_extract/create_skill/skill_forge).
diff --git a/docs/audits/PHASE-1-CODE-QUALITY.md b/docs/audits/PHASE-1-CODE-QUALITY.md
index a2e58f7..b48564c 100644
--- a/docs/audits/PHASE-1-CODE-QUALITY.md
+++ b/docs/audits/PHASE-1-CODE-QUALITY.md
@@ -126,7 +126,9 @@ Both scan `SKILLS_DIR` independently, so a skill file is loaded twice in differe
 >
 > **Tranche 2 (PR-3E-2):** built the streaming keystone **`codec_llm.stream()`** (sync generator, yields raw deltas, never-raises) + extracted a shared `_build_request`; migrated `codec_session.qwen_stream`, `codec_compaction`, `codec_dictate`. Read-the-source moved `codec_textassist` + `scripts/regen_skill_descriptions` to **tranche 2c** (they have a raise-on-failure contract — never-raise would paste empty over the user's selection / write empty descriptions). Pinned by `tests/test_llm_stream.py`. See `docs/PR3E2-LLM-STREAM-TRANCHE2-DESIGN.md`.
 >
-> **Tranche 2c (PR-3E-2c):** added `codec_llm.LLMError` + `codec_llm.call(raise_on_error=True)` (raises on every non-success — non-200/exception/empty-200; default stays never-raise). Migrated the 4 fail-loud sites: `codec_textassist` + `scripts/regen_skill_descriptions` (LLMError propagates — fixes the destructive empty-paste / empty-description), and `codec_agent_plan._qwen_chat` + `codec_agent_runner._qwen_chat` via a thin adapter that maps `LLMError` → their public `QwenUnavailableError` (+ a parallel `_qwen_base()` helper). Pinned by `tests/test_llm_raise_mode.py` (109 agent tests still green). See `docs/PR3E2C-RAISE-MODE-DESIGN.md`. **Still pending (own PRs):** dashboard (4 non-stream + 1 stream tag-machine), voice `_stream_qwen` + agents `Agent.run` (async + queue → need `astream()`), bridges telegram/imessage, self_improve/watcher, skills tranche (translate/fact_extract/create_skill/skill_forge).
+> **Tranche 2c (PR-3E-2c):** added `codec_llm.LLMError` + `codec_llm.call(raise_on_error=True)` (raises on every non-success — non-200/exception/empty-200; default stays never-raise). Migrated the 4 fail-loud sites: `codec_textassist` + `scripts/regen_skill_descriptions` (LLMError propagates — fixes the destructive empty-paste / empty-description), and `codec_agent_plan._qwen_chat` + `codec_agent_runner._qwen_chat` via a thin adapter that maps `LLMError` → their public `QwenUnavailableError` (+ a parallel `_qwen_base()` helper). Pinned by `tests/test_llm_raise_mode.py` (109 agent tests still green). See `docs/PR3E2C-RAISE-MODE-DESIGN.md`.
+>
+> **Bridges (PR-3E-bridges):** migrated `codec_telegram.call_llm` + `codec_imessage.call_llm` text sites to `codec_llm.call` (default never-raise — bridges WANT graceful degradation; the `None`-on-failure contract is preserved via `content if content else None`, and `chat_template_kwargs` is filtered from `llm_cfg["kwargs"]`). Vision sites in both bridges are A-11 (codec_vision), left untouched. Pinned by `tests/test_llm_bridges.py`. See `docs/PR3E-BRIDGES-DESIGN.md`. **Still pending (own PRs):** dashboard (4 non-stream + 1 stream tag-machine), voice `_stream_qwen` + agents `Agent.run` (async + queue → need `astream()`), self_improve/watcher, skills tranche (translate/fact_extract/create_skill/skill_forge).
 **Location:** Sample sites: `codec.py:702`, `codec_dashboard.py:980,1076,1215`, `codec_voice.py:180,196,208,213`, `codec_session.py:215,278,307`, `codec_agents.py:51`, `codec_agent_plan.py:239`, `codec_agent_runner.py:148`, `codec_compaction.py:78`, `codec_self_improve.py:238`, `codec_telegram.py:471,508`, `codec_imessage.py:341,391`, `codec_textassist.py:33`, `codec_dictate.py:492`, `codec_watcher.py:86,182`. Total: 51 occurrences via `grep -rn "chat/completions"`.
 **Impact:** Each site repeats: headers build, `Authorization: Bearer {api_key}` formatting, `{Content-Type: application/json}`, payload assembly with `chat_template_kwargs.enable_thinking=False`, `<think>` stripping, `try/except` for `r.json()` shape (`choices[0].message.content` or `.reasoning` fallback). Many also re-implement streaming SSE parsing. When the Qwen-3.6 upgrade landed, this is exactly the kind of change that needs to be applied in 20+ places.
 **Recommended fix:** Add `codec_llm_proxy.call(messages, **kwargs)` and `codec_llm_proxy.stream(messages, **kwargs)` as the single canonical API (the module already exists at `codec_llm_proxy.py`, only 130 LOC, only used by codec_voice + codec_agents). Migrate all 51 sites over the course of the Phase 1 hardening. As a first step, just covering the 5 sites in codec.py + codec_dashboard.py + codec_session.py would remove ~80% of the most-edited duplication.
diff --git a/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md b/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md
index d8538af..ffdddcc 100644
--- a/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md
+++ b/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md
@@ -245,6 +245,7 @@ Mirror the Intake Phase 3 wave pattern. 7 waves planned; sizes are PR-counts, NO
 - PR-3E: A-11 + A-12 — unify vision + `chat/completions` ✅ (branch `fix/pr3e-llm-vision-dedup`, design-first per §11 → `docs/PR3E-LLM-VISION-DEDUP-DESIGN.md`; **Option 2** chosen by Mickael). **A-11 fully closed**: new `codec_vision.py` (sync+async, Gemini→local fallback, live config); all 3 consumers (codec.py/voice/session) delegate; session gains a Gemini fallback it lacked. **A-12 first tranche**: discovered `codec_llm_proxy` is a *queue*, not an HTTP caller — built genuinely-new `codec_llm.py` (`call()` + `strip_think`/`extract_content`, retry, never-raises) and migrated codec.py voice-reply chat + `codec_session.qwen_call`. **Deferred to phased follow-ons**: `qwen_stream` SSE (needs `codec_llm.stream()`) + ~40 remaining sites (dashboard/voice/agents/bridges/misc), each its own tranche. 19 tests (`tests/test_llm_vision_dedup.py`); full suite zero new failures.
 - PR-3E-2: A-12 tranche 2 ✅ (branch `fix/pr3-a12-tranche2-stream`, design-first → `docs/PR3E2-LLM-STREAM-TRANCHE2-DESIGN.md`; **Option 1** chosen). Built streaming keystone `codec_llm.stream()` (sync generator, raw deltas, never-raises) + shared `_build_request`; migrated `codec_session.qwen_stream` (proof) + non-streaming trivials `codec_compaction` + `codec_dictate`. Read-the-source moved `codec_textassist` + `regen_skill_descriptions` to **2c** (raise-on-failure contract — never-raise would paste empty over the user's selection / write empty descriptions). 14 tests (`tests/test_llm_stream.py`); zero net-new ruff; full suite 1409 passing, zero new failures. **Remaining A-12 tranches:** 2c (raise-mode: textassist/regen/agent_plan/agent_runner), bridges (telegram/imessage), dashboard (non-stream + the stream tag-machine), voice `_stream_qwen` + agents (async `astream()` + queue), skills tranche.
 - PR-3E-2c: A-12 tranche 2c ✅ (branch `fix/pr3-a12-tranche2c-raise-mode`, design-first → `docs/PR3E2C-RAISE-MODE-DESIGN.md`). Added `codec_llm.LLMError` + `codec_llm.call(raise_on_error=True)` (raises on every non-success path; default unchanged never-raise). Migrated the 4 fail-loud sites: `codec_textassist` (was pasting empty over the user's selection on failure) + `scripts/regen_skill_descriptions` (LLMError propagates like the old `raise_for_status`), and `codec_agent_plan`/`codec_agent_runner` `_qwen_chat` via an adapter mapping `LLMError` → their public `QwenUnavailableError` (+ parallel `_qwen_base()`). 14 tests (`tests/test_llm_raise_mode.py`); 109 agent tests still green; zero net-new ruff; full suite 1423 passing, zero new failures. **Remaining A-12:** bridges (telegram/imessage), dashboard (4 non-stream + stream tag-machine), voice `_stream_qwen` + agents `Agent.run` (async `astream()` + queue), skills tranche.
+- PR-3E-bridges: A-12 bridges ✅ (branch `fix/pr3-a12-bridges`, design-first → `docs/PR3E-BRIDGES-DESIGN.md`). Migrated `codec_telegram.call_llm` + `codec_imessage.call_llm` text sites to `codec_llm.call` (default never-raise — bridges want graceful degradation; `None`-on-failure contract preserved via `content if content else None`, `chat_template_kwargs` filtered from kwargs). Removed a dead `import re` in imessage. Vision sites left for A-11. 8 tests (`tests/test_llm_bridges.py`); zero net-new ruff; full suite 1431 passing, zero new failures. **Remaining A-12:** dashboard (4 non-stream + stream tag-machine), voice `_stream_qwen` + agents `Agent.run` (async `astream()` + queue), skills tranche.
 - PR-3F (optional, large): A-19 — bridge unification (iMessage + Telegram → `BridgeRouter`)
 - PR-3G: small misc ✅ (branch `fix/pr3g-small-misc-cleanup`) — closed A-9 (DISABLED overlay, ~90 LOC), A-10 (run_session_module, 33 LOC + orphan `import sys`), A-14 (close_session shadow import), A-18 (9 unused Pydantic models + dead typing import). A-13 (dashboard pattern blocker) verified **already closed by PR-2C**. 6 regression tests; zero net-new ruff (net −); full suite 1344 passing. **Deferred from this batch (each needs its own focused PR):** A-8 (codec_keyboard.py 398 LOC — verify-first delete-or-migrate decision), A-15 (config_version — additive migration feature touching `load_config`), A-20 (inline sqlite in the live dispatch path — reliability fix needing a CodecMemory method).
 - A-15: config schema versioning ✅ (branch `fix/pr3-a15-config-versioning`; `CONFIG_SCHEMA_VERSION=1` + migration ladder + idempotent atomic write-back in `load_config`; never creates-on-missing or overwrites-corrupt; 12 tests; zero net-new ruff; full suite 1356 passing).
diff --git a/tests/test_llm_bridges.py b/tests/test_llm_bridges.py
new file mode 100644
index 0000000..603fc65
--- /dev/null
+++ b/tests/test_llm_bridges.py
@@ -0,0 +1,100 @@
+"""Tests for PR-3E-bridges — A-12: migrate iMessage + Telegram call_llm.
+
+Both `call_llm` text sites now route through codec_llm.call (default never-raise).
+The bridge contract — return None on any failure/empty for graceful degradation
+— is preserved via `content if content else None`. `chat_template_kwargs` is
+filtered out of llm_cfg["kwargs"] so enable_thinking=False survives.
+
+Reference: docs/PR3E-BRIDGES-DESIGN.md.
+"""
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(REPO))
+
+import codec_llm  # noqa: E402
+
+
+def _cfg(api_key="", kwargs=None):
+    return {"base_url": "http://x/v1", "model": "m",
+            "api_key": api_key, "kwargs": kwargs or {}}
+
+
+# ── telegram ──────────────────────────────────────────────────────────────────
+
+
+def test_telegram_call_llm_returns_content(monkeypatch):
+    import codec_telegram
+    monkeypatch.setattr(codec_llm, "call", lambda *a, **k: "tg reply")
+    assert codec_telegram.call_llm("hi", _cfg()) == "tg reply"
+
+
+def test_telegram_call_llm_none_on_empty(monkeypatch):
+    import codec_telegram
+    monkeypatch.setattr(codec_llm, "call", lambda *a, **k: "")
+    assert codec_telegram.call_llm("hi", _cfg()) is None
+
+
+def test_telegram_call_llm_passes_params_and_filters_kwargs(monkeypatch):
+    import codec_telegram
+    cap = {}
+
+    def fake(messages, **k):
+        cap["messages"] = messages
+        cap.update(k)
+        return "ok"
+
+    monkeypatch.setattr(codec_llm, "call", fake)
+    codec_telegram.call_llm("hi", _cfg(api_key="K", kwargs={
+        "top_p": 0.9, "chat_template_kwargs": {"enable_thinking": True}}))
+    assert cap["base_url"] == "http://x/v1" and cap["model"] == "m"
+    assert cap["api_key"] == "K"
+    assert cap["extra_kwargs"] == {"top_p": 0.9}      # chat_template_kwargs filtered
+    assert cap["messages"][-1] == {"role": "user", "content": "hi"}
+
+
+# ── imessage (call_llm has a `sender` 2nd positional arg) ──────────────────────
+
+
+def test_imessage_call_llm_returns_content(monkeypatch):
+    import codec_imessage
+    monkeypatch.setattr(codec_llm, "call", lambda *a, **k: "im reply")
+    assert codec_imessage.call_llm("hi", "+1", _cfg()) == "im reply"
+
+
+def test_imessage_call_llm_none_on_empty(monkeypatch):
+    import codec_imessage
+    monkeypatch.setattr(codec_llm, "call", lambda *a, **k: "")
+    assert codec_imessage.call_llm("hi", "+1", _cfg()) is None
+
+
+def test_imessage_call_llm_filters_kwargs(monkeypatch):
+    import codec_imessage
+    cap = {}
+
+    def fake(messages, **k):
+        cap.update(k)
+        return "ok"
+
+    monkeypatch.setattr(codec_llm, "call", fake)
+    codec_imessage.call_llm("hi", "+1", _cfg(api_key="K", kwargs={
+        "top_p": 0.5, "chat_template_kwargs": {"enable_thinking": True}}))
+    assert cap["extra_kwargs"] == {"top_p": 0.5} and cap["api_key"] == "K"
+
+
+# ── source-level migration invariants ─────────────────────────────────────────
+
+
+def test_telegram_uses_codec_llm():
+    src = (REPO / "codec_telegram.py").read_text()
+    assert "codec_llm.call(" in src
+    assert src.count("/chat/completions") == 1   # only the vision site remains
+
+
+def test_imessage_uses_codec_llm():
+    src = (REPO / "codec_imessage.py").read_text()
+    assert "codec_llm.call(" in src
+    assert src.count("/chat/completions") == 1