From ce5078018228c166e56cf77b7e2b958167f497b3 Mon Sep 17 00:00:00 2001 From: Mickael Farina Date: Fri, 22 May 2026 15:48:51 +0200 Subject: [PATCH] refactor(llm): migrate iMessage + Telegram call_llm onto codec_llm (A-12 bridges) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-3E-bridges. The two outbound bridges hand-rolled the same OpenAI chat/completions text call; both now route through codec_llm.call. Bridges WANT graceful degradation, so the default never-raise behavior is exactly right: codec_llm.call returns "" on error/no-choices/timeout/empty, which maps back to the bridge's None-on-failure contract via `content if content else None`. enable_thinking=False is preserved by passing llm_cfg["kwargs"] minus chat_template_kwargs; stripping moves into codec_llm. Auth parity (Bearer iff api_key) and the 1500/0.7 tuning are kept. The vision sites in both bridges (llm_cfg["vision_url"]) are A-11 / codec_vision, not this tranche — left untouched. Removed a now-dead `import re` in codec_imessage (its only use was the inline strip). Tests: tests/test_llm_bridges.py (8 — each bridge returns content on success, returns None on empty/failure (graceful-degradation contract), passes base_url/model/api_key through and filters chat_template_kwargs out of extra_kwargs; source invariants confirm codec_llm.call + only the vision /chat/completions site remains). Full suite 1431 passing, 23 known-baseline failures, zero new. Net -50 LOC. Zero net-new ruff. No skills/ touched. Co-Authored-By: Claude Opus 4.7 --- AGENTS.md | 2 +- codec_imessage.py | 54 +++-------- codec_telegram.py | 47 +++------- docs/PR3E-BRIDGES-DESIGN.md | 54 +++++++++++ docs/audits/PHASE-1-CODE-QUALITY.md | 4 +- docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md | 1 + tests/test_llm_bridges.py | 100 +++++++++++++++++++++ 7 files changed, 183 insertions(+), 79 deletions(-) create mode 100644 docs/PR3E-BRIDGES-DESIGN.md create mode 100644 tests/test_llm_bridges.py diff --git a/AGENTS.md b/AGENTS.md index 83f5de1..5d69fd3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -56,7 +56,7 @@ docs/ API.md, MCP_HTTP_SETUP.md, CONTEXT_REPORT.md, desig Other engine modules (`codec_overlays`, `codec_metrics`, `codec_logging`, `codec_gdocs`, `codec_google_auth`, `codec_cdp`, `codec_llm_proxy`, `codec_retry`, `codec_alerts`, `codec_search`, `codec_textassist`, `codec_watcher`, `codec_watchdog`) are internal helpers — read them when you need them, but they're not part of the navigation surface for an agent making structural changes. (Keyboard handling — wake word, F13 toggle, F18 voice, double-tap — lives **inline in `codec.py`** in the `codec` PM2 process; the old standalone `codec_keyboard.py` was deleted as a dead duplicate per A-8.) -**Canonical LLM + vision helpers (PR-3E, A-11/A-12).** `codec_vision.py` is the SINGLE source for screen-vision (`describe_sync` / `describe_async`, Gemini-flash → local-Qwen-VL fallback, config read live from `codec_config`) — used by `codec.py`, `codec_voice`, `codec_session`. `codec_llm.py` is the canonical chat/completions caller (`call()` + `strip_think`/`extract_content` — headers, Bearer auth, `enable_thinking`, `` strip, `choices/reasoning` parse, retry+backoff, never-raises). NOTE: `codec_llm_proxy.py` is a priority *queue* (semaphore), NOT an HTTP caller — don't confuse the two. A-12 is migrating the ~45 inline `chat/completions` sites onto `codec_llm` in phased tranches. Done: `codec_llm.call()` (non-stream; + `raise_on_error=True` raising `codec_llm.LLMError` for fail-loud callers) + `stream()` (sync SSE generator, yields raw deltas, never-raises); migrated sites = codec.py voice-reply, `codec_session.qwen_call` + `qwen_stream`, `codec_compaction`, `codec_dictate`, `codec_textassist`, the regen script, and `codec_agent_plan`/`codec_agent_runner` `_qwen_chat` (adapter maps `LLMError` → their public `QwenUnavailableError`). Pending tranches: an async `astream()` for voice `_stream_qwen` + agents `Agent.run` (queue stays at the call site — `codec_llm` never owns the semaphore), dashboard (4 non-stream + the `[SKILL:…]` stream tag-machine, which keeps its own parser and consumes only `stream()`'s raw tokens), bridges, and a skills tranche. +**Canonical LLM + vision helpers (PR-3E, A-11/A-12).** `codec_vision.py` is the SINGLE source for screen-vision (`describe_sync` / `describe_async`, Gemini-flash → local-Qwen-VL fallback, config read live from `codec_config`) — used by `codec.py`, `codec_voice`, `codec_session`. `codec_llm.py` is the canonical chat/completions caller (`call()` + `strip_think`/`extract_content` — headers, Bearer auth, `enable_thinking`, `` strip, `choices/reasoning` parse, retry+backoff, never-raises). NOTE: `codec_llm_proxy.py` is a priority *queue* (semaphore), NOT an HTTP caller — don't confuse the two. A-12 is migrating the ~45 inline `chat/completions` sites onto `codec_llm` in phased tranches. Done: `codec_llm.call()` (non-stream; + `raise_on_error=True` raising `codec_llm.LLMError` for fail-loud callers) + `stream()` (sync SSE generator, yields raw deltas, never-raises); migrated sites = codec.py voice-reply, `codec_session.qwen_call` + `qwen_stream`, `codec_compaction`, `codec_dictate`, `codec_textassist`, the regen script, and `codec_agent_plan`/`codec_agent_runner` `_qwen_chat` (adapter maps `LLMError` → their public `QwenUnavailableError`), and the `codec_telegram`/`codec_imessage` `call_llm` text sites (default never-raise → `None`-contract preserved; their vision sites are A-11/codec_vision, not migrated here). Pending tranches: an async `astream()` for voice `_stream_qwen` + agents `Agent.run` (queue stays at the call site — `codec_llm` never owns the semaphore), dashboard (4 non-stream + the `[SKILL:…]` stream tag-machine, which keeps its own parser and consumes only `stream()`'s raw tokens), and a skills tranche. ## 3. Agent + Crew runtime diff --git a/codec_imessage.py b/codec_imessage.py index f9ae2bd..ce00b73 100644 --- a/codec_imessage.py +++ b/codec_imessage.py @@ -16,7 +16,6 @@ """ import os -import re import json import time import sqlite3 @@ -323,47 +322,18 @@ def call_llm(text, sender, llm_cfg, conversation_history=None, system_prompt_ove messages.append({"role": "user", "content": text}) - headers = {"Content-Type": "application/json"} - if llm_cfg["api_key"]: - headers["Authorization"] = f"Bearer {llm_cfg['api_key']}" - - payload = { - "model": llm_cfg["model"], - "messages": messages, - "max_tokens": 1500, - "temperature": 0.7, - "stream": False, - "chat_template_kwargs": {"enable_thinking": False}, - } - # Merge extra kwargs (but protect chat_template_kwargs) - payload.update({k: v for k, v in llm_cfg["kwargs"].items() if k != "chat_template_kwargs"}) - - try: - r = requests.post( - f"{llm_cfg['base_url']}/chat/completions", - json=payload, - headers=headers, - timeout=120, - ) - data = r.json() - - if "error" in data: - log.error(f"LLM error: {data['error']}") - return None - if "choices" not in data or not data["choices"]: - log.error(f"LLM no choices: {str(data)[:200]}") - return None - - content = (data["choices"][0]["message"].get("content") or "").strip() - # Strip thinking tags - content = re.sub(r'[\s\S]*?', '', content).strip() - return content if content else None - except requests.exceptions.Timeout: - log.error("LLM timeout") - return None - except Exception as e: - log.error(f"LLM call failed: {e}") - return None + # A-12 (PR-3E-bridges): canonical codec_llm.call. Never-raise -> "" on any + # failure (error/no-choices/timeout/empty); mapped back to the bridge's None + # contract for graceful degradation. codec_llm strips ; kwargs are + # passed minus chat_template_kwargs so enable_thinking=False is preserved. + import codec_llm + extra = {k: v for k, v in llm_cfg["kwargs"].items() if k != "chat_template_kwargs"} + content = codec_llm.call( + messages, base_url=llm_cfg["base_url"], model=llm_cfg["model"], + api_key=llm_cfg["api_key"], max_tokens=1500, temperature=0.7, + timeout=120, extra_kwargs=extra, + ) + return content if content else None # ── Vision (image attachments) ────────────────────────────────────────────── diff --git a/codec_telegram.py b/codec_telegram.py index 1ad6719..1c733f5 100644 --- a/codec_telegram.py +++ b/codec_telegram.py @@ -463,41 +463,18 @@ def call_llm(text, llm_cfg, conversation_history=None, system_prompt_override=No messages.extend(conversation_history[-8:]) messages.append({"role": "user", "content": text}) - headers = {"Content-Type": "application/json"} - if llm_cfg["api_key"]: - headers["Authorization"] = f"Bearer {llm_cfg['api_key']}" - - payload = { - "model": llm_cfg["model"], - "messages": messages, - "max_tokens": 1500, - "temperature": 0.7, - "stream": False, - "chat_template_kwargs": {"enable_thinking": False}, - } - payload.update({k: v for k, v in llm_cfg["kwargs"].items() if k != "chat_template_kwargs"}) - - try: - r = requests.post( - f"{llm_cfg['base_url']}/chat/completions", - json=payload, headers=headers, timeout=120, - ) - data = r.json() - if "error" in data: - log.error(f"LLM error: {data['error']}") - return None - if "choices" not in data or not data["choices"]: - log.error(f"LLM no choices: {str(data)[:200]}") - return None - content = (data["choices"][0]["message"].get("content") or "").strip() - content = re.sub(r'[\s\S]*?', '', content).strip() - return content if content else None - except requests.exceptions.Timeout: - log.error("LLM timeout") - return None - except Exception as e: - log.error(f"LLM call failed: {e}") - return None + # A-12 (PR-3E-bridges): canonical codec_llm.call. Never-raise -> "" on any + # failure (error/no-choices/timeout/empty); mapped back to the bridge's None + # contract for graceful degradation. codec_llm strips ; kwargs are + # passed minus chat_template_kwargs so enable_thinking=False is preserved. + import codec_llm + extra = {k: v for k, v in llm_cfg["kwargs"].items() if k != "chat_template_kwargs"} + content = codec_llm.call( + messages, base_url=llm_cfg["base_url"], model=llm_cfg["model"], + api_key=llm_cfg["api_key"], max_tokens=1500, temperature=0.7, + timeout=120, extra_kwargs=extra, + ) + return content if content else None # ── Vision (photo messages) ───────────────────────────────────────────────── diff --git a/docs/PR3E-BRIDGES-DESIGN.md b/docs/PR3E-BRIDGES-DESIGN.md new file mode 100644 index 0000000..a7889b8 --- /dev/null +++ b/docs/PR3E-BRIDGES-DESIGN.md @@ -0,0 +1,54 @@ +# PR-3E-bridges — A-12: migrate the iMessage + Telegram `call_llm` text sites (DESIGN) + +**Status:** IMPLEMENTED. Both `call_llm` text sites route through `codec_llm.call` (default never-raise → `None`-contract preserved; `chat_template_kwargs` filtered from kwargs). Removed a now-dead `import re` from `codec_imessage`. 8 new tests (`tests/test_llm_bridges.py`); full suite 1431 passing, zero new; zero net-new ruff. +**Finding:** A-12 (continuation). The two outbound bridges hand-roll the same OpenAI `chat/completions` text call. +**Wave:** 3. Smallest, lowest-risk tranche — never-raise is the *default* `codec_llm.call` behavior, so this is a near-mechanical swap. + +--- + +## 1. The two sites (identical shape) + +- `codec_telegram.call_llm` (text site `:482`) +- `codec_imessage.call_llm` (text site `:343`) + +Both build `messages` (system + last-8 history + user), set headers (Bearer iff `llm_cfg["api_key"]`), POST a payload (`max_tokens=1500`, `temperature=0.7`, `stream=False`, `chat_template_kwargs.enable_thinking=False`, plus `llm_cfg["kwargs"]` **minus** any `chat_template_kwargs`), then: +- `{"error": …}` or no `choices` → log + **return `None`** +- parse content, strip ``, `return content if content else None` +- `requests.exceptions.Timeout` → `None`; any other `Exception` → `None` + +**Contract: `call_llm` returns `None` on every failure/empty** — the caller degrades gracefully (no reply / fallback). This is the *opposite* of the 2c sites: bridges WANT silent best-effort, so `codec_llm.call`'s default never-raise is exactly right. + +The vision sites (`telegram:519`, `imessage:393`, via `llm_cfg["vision_url"]`) are **A-11** (codec_vision), NOT this tranche — left untouched. + +## 2. Migration (each site) + +Replace the headers + payload + `try/except` block with: +```python +import codec_llm +extra = {k: v for k, v in llm_cfg["kwargs"].items() if k != "chat_template_kwargs"} +content = codec_llm.call( + messages, base_url=llm_cfg["base_url"], model=llm_cfg["model"], + api_key=llm_cfg["api_key"], max_tokens=1500, temperature=0.7, + timeout=120, extra_kwargs=extra, +) +return content if content else None +``` +- **`None`-contract preserved**: `codec_llm.call` returns `""` on error / no-choices / timeout / exception / empty → `content if content else None` → `None`. Exact parity for the caller. +- **`enable_thinking=False` preserved**: `extra` strips `chat_template_kwargs` (same as the original) so the explicit flag in `_build_request` isn't overridden. +- **`` strip** now handled by `codec_llm`. +- **Auth parity**: `api_key=llm_cfg["api_key"]` → Bearer iff non-empty (same as original). + +## 3. Behavior deltas (minor, documented) +- **Logging granularity**: the original logged distinct messages ("LLM error" / "LLM no choices" / "LLM timeout" / "LLM call failed"); `codec_llm` logs one generic warning. Same observable behavior (`None` returned) — only the log string differs. +- Everything else is byte-parity for the caller. + +## 4. Test plan +- `tests/test_llm_bridges.py`: for each bridge — `call_llm` returns the content on success (monkeypatched `codec_llm.call`); returns **`None`** when `codec_llm.call` returns `""` (graceful-degradation contract); passes `base_url`/`model`/`api_key` through and `extra_kwargs` with `chat_template_kwargs` **filtered out**. Source invariants: each calls `codec_llm.call(`, and `/chat/completions` count drops to **1** (only the vision site remains). +- Full suite: expect the 23 known-baseline failures, **zero new**. No `skills/` touched → no manifest regen. + +## 5. Risk + rollback +- **Blast radius:** 2 functions in 2 outbound bridges. `None`-contract preserved → callers unaffected. Inbound stays PWA-only (unchanged). +- **Rollback:** single-commit revert. + +## 6. After bridges — remaining A-12 +dashboard (4 non-stream + the `[SKILL:…]` stream tag-machine), voice `_stream_qwen` + agents `Agent.run` (async `astream()` + queue at the call site), skills tranche (translate/fact_extract/create_skill/skill_forge). diff --git a/docs/audits/PHASE-1-CODE-QUALITY.md b/docs/audits/PHASE-1-CODE-QUALITY.md index a2e58f7..b48564c 100644 --- a/docs/audits/PHASE-1-CODE-QUALITY.md +++ b/docs/audits/PHASE-1-CODE-QUALITY.md @@ -126,7 +126,9 @@ Both scan `SKILLS_DIR` independently, so a skill file is loaded twice in differe > > **Tranche 2 (PR-3E-2):** built the streaming keystone **`codec_llm.stream()`** (sync generator, yields raw deltas, never-raises) + extracted a shared `_build_request`; migrated `codec_session.qwen_stream`, `codec_compaction`, `codec_dictate`. Read-the-source moved `codec_textassist` + `scripts/regen_skill_descriptions` to **tranche 2c** (they have a raise-on-failure contract — never-raise would paste empty over the user's selection / write empty descriptions). Pinned by `tests/test_llm_stream.py`. See `docs/PR3E2-LLM-STREAM-TRANCHE2-DESIGN.md`. > -> **Tranche 2c (PR-3E-2c):** added `codec_llm.LLMError` + `codec_llm.call(raise_on_error=True)` (raises on every non-success — non-200/exception/empty-200; default stays never-raise). Migrated the 4 fail-loud sites: `codec_textassist` + `scripts/regen_skill_descriptions` (LLMError propagates — fixes the destructive empty-paste / empty-description), and `codec_agent_plan._qwen_chat` + `codec_agent_runner._qwen_chat` via a thin adapter that maps `LLMError` → their public `QwenUnavailableError` (+ a parallel `_qwen_base()` helper). Pinned by `tests/test_llm_raise_mode.py` (109 agent tests still green). See `docs/PR3E2C-RAISE-MODE-DESIGN.md`. **Still pending (own PRs):** dashboard (4 non-stream + 1 stream tag-machine), voice `_stream_qwen` + agents `Agent.run` (async + queue → need `astream()`), bridges telegram/imessage, self_improve/watcher, skills tranche (translate/fact_extract/create_skill/skill_forge). +> **Tranche 2c (PR-3E-2c):** added `codec_llm.LLMError` + `codec_llm.call(raise_on_error=True)` (raises on every non-success — non-200/exception/empty-200; default stays never-raise). Migrated the 4 fail-loud sites: `codec_textassist` + `scripts/regen_skill_descriptions` (LLMError propagates — fixes the destructive empty-paste / empty-description), and `codec_agent_plan._qwen_chat` + `codec_agent_runner._qwen_chat` via a thin adapter that maps `LLMError` → their public `QwenUnavailableError` (+ a parallel `_qwen_base()` helper). Pinned by `tests/test_llm_raise_mode.py` (109 agent tests still green). See `docs/PR3E2C-RAISE-MODE-DESIGN.md`. +> +> **Bridges (PR-3E-bridges):** migrated `codec_telegram.call_llm` + `codec_imessage.call_llm` text sites to `codec_llm.call` (default never-raise — bridges WANT graceful degradation; the `None`-on-failure contract is preserved via `content if content else None`, and `chat_template_kwargs` is filtered from `llm_cfg["kwargs"]`). Vision sites in both bridges are A-11 (codec_vision), left untouched. Pinned by `tests/test_llm_bridges.py`. See `docs/PR3E-BRIDGES-DESIGN.md`. **Still pending (own PRs):** dashboard (4 non-stream + 1 stream tag-machine), voice `_stream_qwen` + agents `Agent.run` (async + queue → need `astream()`), self_improve/watcher, skills tranche (translate/fact_extract/create_skill/skill_forge). **Location:** Sample sites: `codec.py:702`, `codec_dashboard.py:980,1076,1215`, `codec_voice.py:180,196,208,213`, `codec_session.py:215,278,307`, `codec_agents.py:51`, `codec_agent_plan.py:239`, `codec_agent_runner.py:148`, `codec_compaction.py:78`, `codec_self_improve.py:238`, `codec_telegram.py:471,508`, `codec_imessage.py:341,391`, `codec_textassist.py:33`, `codec_dictate.py:492`, `codec_watcher.py:86,182`. Total: 51 occurrences via `grep -rn "chat/completions"`. **Impact:** Each site repeats: headers build, `Authorization: Bearer {api_key}` formatting, `{Content-Type: application/json}`, payload assembly with `chat_template_kwargs.enable_thinking=False`, `` stripping, `try/except` for `r.json()` shape (`choices[0].message.content` or `.reasoning` fallback). Many also re-implement streaming SSE parsing. When the Qwen-3.6 upgrade landed, this is exactly the kind of change that needs to be applied in 20+ places. **Recommended fix:** Add `codec_llm_proxy.call(messages, **kwargs)` and `codec_llm_proxy.stream(messages, **kwargs)` as the single canonical API (the module already exists at `codec_llm_proxy.py`, only 130 LOC, only used by codec_voice + codec_agents). Migrate all 51 sites over the course of the Phase 1 hardening. As a first step, just covering the 5 sites in codec.py + codec_dashboard.py + codec_session.py would remove ~80% of the most-edited duplication. diff --git a/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md b/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md index d8538af..ffdddcc 100644 --- a/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md +++ b/docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md @@ -245,6 +245,7 @@ Mirror the Intake Phase 3 wave pattern. 7 waves planned; sizes are PR-counts, NO - PR-3E: A-11 + A-12 — unify vision + `chat/completions` ✅ (branch `fix/pr3e-llm-vision-dedup`, design-first per §11 → `docs/PR3E-LLM-VISION-DEDUP-DESIGN.md`; **Option 2** chosen by Mickael). **A-11 fully closed**: new `codec_vision.py` (sync+async, Gemini→local fallback, live config); all 3 consumers (codec.py/voice/session) delegate; session gains a Gemini fallback it lacked. **A-12 first tranche**: discovered `codec_llm_proxy` is a *queue*, not an HTTP caller — built genuinely-new `codec_llm.py` (`call()` + `strip_think`/`extract_content`, retry, never-raises) and migrated codec.py voice-reply chat + `codec_session.qwen_call`. **Deferred to phased follow-ons**: `qwen_stream` SSE (needs `codec_llm.stream()`) + ~40 remaining sites (dashboard/voice/agents/bridges/misc), each its own tranche. 19 tests (`tests/test_llm_vision_dedup.py`); full suite zero new failures. - PR-3E-2: A-12 tranche 2 ✅ (branch `fix/pr3-a12-tranche2-stream`, design-first → `docs/PR3E2-LLM-STREAM-TRANCHE2-DESIGN.md`; **Option 1** chosen). Built streaming keystone `codec_llm.stream()` (sync generator, raw deltas, never-raises) + shared `_build_request`; migrated `codec_session.qwen_stream` (proof) + non-streaming trivials `codec_compaction` + `codec_dictate`. Read-the-source moved `codec_textassist` + `regen_skill_descriptions` to **2c** (raise-on-failure contract — never-raise would paste empty over the user's selection / write empty descriptions). 14 tests (`tests/test_llm_stream.py`); zero net-new ruff; full suite 1409 passing, zero new failures. **Remaining A-12 tranches:** 2c (raise-mode: textassist/regen/agent_plan/agent_runner), bridges (telegram/imessage), dashboard (non-stream + the stream tag-machine), voice `_stream_qwen` + agents (async `astream()` + queue), skills tranche. - PR-3E-2c: A-12 tranche 2c ✅ (branch `fix/pr3-a12-tranche2c-raise-mode`, design-first → `docs/PR3E2C-RAISE-MODE-DESIGN.md`). Added `codec_llm.LLMError` + `codec_llm.call(raise_on_error=True)` (raises on every non-success path; default unchanged never-raise). Migrated the 4 fail-loud sites: `codec_textassist` (was pasting empty over the user's selection on failure) + `scripts/regen_skill_descriptions` (LLMError propagates like the old `raise_for_status`), and `codec_agent_plan`/`codec_agent_runner` `_qwen_chat` via an adapter mapping `LLMError` → their public `QwenUnavailableError` (+ parallel `_qwen_base()`). 14 tests (`tests/test_llm_raise_mode.py`); 109 agent tests still green; zero net-new ruff; full suite 1423 passing, zero new failures. **Remaining A-12:** bridges (telegram/imessage), dashboard (4 non-stream + stream tag-machine), voice `_stream_qwen` + agents `Agent.run` (async `astream()` + queue), skills tranche. +- PR-3E-bridges: A-12 bridges ✅ (branch `fix/pr3-a12-bridges`, design-first → `docs/PR3E-BRIDGES-DESIGN.md`). Migrated `codec_telegram.call_llm` + `codec_imessage.call_llm` text sites to `codec_llm.call` (default never-raise — bridges want graceful degradation; `None`-on-failure contract preserved via `content if content else None`, `chat_template_kwargs` filtered from kwargs). Removed a dead `import re` in imessage. Vision sites left for A-11. 8 tests (`tests/test_llm_bridges.py`); zero net-new ruff; full suite 1431 passing, zero new failures. **Remaining A-12:** dashboard (4 non-stream + stream tag-machine), voice `_stream_qwen` + agents `Agent.run` (async `astream()` + queue), skills tranche. - PR-3F (optional, large): A-19 — bridge unification (iMessage + Telegram → `BridgeRouter`) - PR-3G: small misc ✅ (branch `fix/pr3g-small-misc-cleanup`) — closed A-9 (DISABLED overlay, ~90 LOC), A-10 (run_session_module, 33 LOC + orphan `import sys`), A-14 (close_session shadow import), A-18 (9 unused Pydantic models + dead typing import). A-13 (dashboard pattern blocker) verified **already closed by PR-2C**. 6 regression tests; zero net-new ruff (net −); full suite 1344 passing. **Deferred from this batch (each needs its own focused PR):** A-8 (codec_keyboard.py 398 LOC — verify-first delete-or-migrate decision), A-15 (config_version — additive migration feature touching `load_config`), A-20 (inline sqlite in the live dispatch path — reliability fix needing a CodecMemory method). - A-15: config schema versioning ✅ (branch `fix/pr3-a15-config-versioning`; `CONFIG_SCHEMA_VERSION=1` + migration ladder + idempotent atomic write-back in `load_config`; never creates-on-missing or overwrites-corrupt; 12 tests; zero net-new ruff; full suite 1356 passing). diff --git a/tests/test_llm_bridges.py b/tests/test_llm_bridges.py new file mode 100644 index 0000000..603fc65 --- /dev/null +++ b/tests/test_llm_bridges.py @@ -0,0 +1,100 @@ +"""Tests for PR-3E-bridges — A-12: migrate iMessage + Telegram call_llm. + +Both `call_llm` text sites now route through codec_llm.call (default never-raise). +The bridge contract — return None on any failure/empty for graceful degradation +— is preserved via `content if content else None`. `chat_template_kwargs` is +filtered out of llm_cfg["kwargs"] so enable_thinking=False survives. + +Reference: docs/PR3E-BRIDGES-DESIGN.md. +""" +from __future__ import annotations + +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO)) + +import codec_llm # noqa: E402 + + +def _cfg(api_key="", kwargs=None): + return {"base_url": "http://x/v1", "model": "m", + "api_key": api_key, "kwargs": kwargs or {}} + + +# ── telegram ────────────────────────────────────────────────────────────────── + + +def test_telegram_call_llm_returns_content(monkeypatch): + import codec_telegram + monkeypatch.setattr(codec_llm, "call", lambda *a, **k: "tg reply") + assert codec_telegram.call_llm("hi", _cfg()) == "tg reply" + + +def test_telegram_call_llm_none_on_empty(monkeypatch): + import codec_telegram + monkeypatch.setattr(codec_llm, "call", lambda *a, **k: "") + assert codec_telegram.call_llm("hi", _cfg()) is None + + +def test_telegram_call_llm_passes_params_and_filters_kwargs(monkeypatch): + import codec_telegram + cap = {} + + def fake(messages, **k): + cap["messages"] = messages + cap.update(k) + return "ok" + + monkeypatch.setattr(codec_llm, "call", fake) + codec_telegram.call_llm("hi", _cfg(api_key="K", kwargs={ + "top_p": 0.9, "chat_template_kwargs": {"enable_thinking": True}})) + assert cap["base_url"] == "http://x/v1" and cap["model"] == "m" + assert cap["api_key"] == "K" + assert cap["extra_kwargs"] == {"top_p": 0.9} # chat_template_kwargs filtered + assert cap["messages"][-1] == {"role": "user", "content": "hi"} + + +# ── imessage (call_llm has a `sender` 2nd positional arg) ────────────────────── + + +def test_imessage_call_llm_returns_content(monkeypatch): + import codec_imessage + monkeypatch.setattr(codec_llm, "call", lambda *a, **k: "im reply") + assert codec_imessage.call_llm("hi", "+1", _cfg()) == "im reply" + + +def test_imessage_call_llm_none_on_empty(monkeypatch): + import codec_imessage + monkeypatch.setattr(codec_llm, "call", lambda *a, **k: "") + assert codec_imessage.call_llm("hi", "+1", _cfg()) is None + + +def test_imessage_call_llm_filters_kwargs(monkeypatch): + import codec_imessage + cap = {} + + def fake(messages, **k): + cap.update(k) + return "ok" + + monkeypatch.setattr(codec_llm, "call", fake) + codec_imessage.call_llm("hi", "+1", _cfg(api_key="K", kwargs={ + "top_p": 0.5, "chat_template_kwargs": {"enable_thinking": True}})) + assert cap["extra_kwargs"] == {"top_p": 0.5} and cap["api_key"] == "K" + + +# ── source-level migration invariants ───────────────────────────────────────── + + +def test_telegram_uses_codec_llm(): + src = (REPO / "codec_telegram.py").read_text() + assert "codec_llm.call(" in src + assert src.count("/chat/completions") == 1 # only the vision site remains + + +def test_imessage_uses_codec_llm(): + src = (REPO / "codec_imessage.py").read_text() + assert "codec_llm.call(" in src + assert src.count("/chat/completions") == 1