Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
signal.signal(signal.SIGINT, lambda *a: None)
signal.signal(signal.SIGTERM, lambda *a: None)
"""CODEC v2.1 | F13=on/off | F18=voice | F16=text | *=screenshot | +=doc | Wake word"""
import logging, threading, tempfile, subprocess, os, time, sqlite3, json, re, base64, shutil
import logging, threading, tempfile, subprocess, os, time, json, re, base64, shutil
from datetime import datetime
from pynput import keyboard

Expand All @@ -24,7 +24,7 @@
cfg as _cfg,
QWEN_BASE_URL, QWEN_MODEL, LLM_API_KEY, LLM_KWARGS, QWEN_VISION_URL, QWEN_VISION_MODEL,
WHISPER_URL,
DB_PATH, TASK_QUEUE_FILE, DRAFT_TASK_FILE, SESSION_ALIVE, STREAMING, WAKE_WORD, WAKE_ENERGY, WAKE_CHUNK_SEC,
TASK_QUEUE_FILE, DRAFT_TASK_FILE, SESSION_ALIVE, STREAMING, WAKE_WORD, WAKE_ENERGY, WAKE_CHUNK_SEC,
WAKE_PHRASES,
get_gemini_api_key,
)
Expand Down Expand Up @@ -71,7 +71,7 @@ def _is_wake_utterance(text: str) -> bool:
# ─��� SHARED (from codec_core.py — single source of truth) ─────────────────────
import codec_core as _core
from codec_core import (
strip_think, is_draft, init_db, save_task, get_memory, get_recent_conversations,
strip_think, is_draft, init_db, save_task, update_session_response, get_memory, get_recent_conversations,
loaded_skills, load_skills, run_skill,
transcribe, speak_text, focused_app, get_text_dialog,
terminal_session_exists,
Expand Down Expand Up @@ -473,13 +473,11 @@ def _post_skill_screenshot():
# Add assistant response to session history
voice_session["messages"].append({"role": "assistant", "content": answer})
voice_session["turn_count"] += 1
# Save response to DB
try:
c = sqlite3.connect(DB_PATH)
c.execute("UPDATE sessions SET response=? WHERE id=?", (answer[:500], rid))
c.commit(); c.close()
except Exception as e:
log.warning(f"[CODEC] DB save failed: {e}")
# Save response to DB (A-20: via codec_core helper with
# WAL + busy_timeout — replaces the inline lock-prone
# sqlite3.connect that risked "database is locked" under
# concurrent agent-runner + voice writes). Never raises.
update_session_response(rid, answer[:500])
# Save to shared memory (same store as Chat)
try:
cm = CodecMemory()
Expand Down
48 changes: 44 additions & 4 deletions codec_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,26 @@ def is_draft(t):
def needs_screen(t): return any(k in t.lower() for k in SCREEN_KEYWORDS)

# ── MEMORY ────────────────────────────────────────────────────────────────────
def init_db():
def _db_connect():
"""Open a connection to the CODEC DB with WAL + busy_timeout (A-20).

Before this, codec_core's session helpers used a bare
`sqlite3.connect(DB_PATH)` with no pragmas — under concurrent writes
(Phase 3 agent runner + voice handler) that risks `database is locked`.
WAL lets readers and a writer coexist; busy_timeout=5000 makes a
contended writer wait up to 5s instead of erroring immediately. Matches
`codec_memory.CodecMemory` and `routes/_shared.get_db()`."""
c = sqlite3.connect(DB_PATH)
try:
c.execute("PRAGMA journal_mode=WAL")
c.execute("PRAGMA busy_timeout=5000")
except sqlite3.Error:
pass # pragma failures shouldn't break the connection
return c


def init_db():
c = _db_connect()
c.execute("CREATE TABLE IF NOT EXISTS sessions (id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TEXT, task TEXT, app TEXT, response TEXT, user_id TEXT DEFAULT 'default')")
c.execute("CREATE TABLE IF NOT EXISTS conversations (id INTEGER PRIMARY KEY AUTOINCREMENT, session_id TEXT, timestamp TEXT, role TEXT, content TEXT, user_id TEXT DEFAULT 'default')")
c.execute("CREATE TABLE IF NOT EXISTS corrections (id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TEXT, original TEXT, corrected TEXT, context TEXT, user_id TEXT DEFAULT 'default')")
Expand All @@ -72,13 +90,35 @@ def init_db():
c.commit(); c.close()

def save_task(task, app, user_id="default"):
c = sqlite3.connect(DB_PATH)
c = _db_connect()
cur = c.execute("INSERT INTO sessions (timestamp,task,app,response,user_id) VALUES (?,?,?,?,?)", (datetime.now().isoformat(), task[:200], app, "", user_id))
rid = cur.lastrowid; c.commit(); c.close(); return rid


def update_session_response(rid, response, user_id="default"):
"""Set the `response` column for a session row (A-20).

Replaces the inline `sqlite3.connect(DB_PATH); UPDATE sessions ...` block
that codec.py's voice handler used — that bypassed the WAL/busy_timeout
setup and risked `database is locked` under concurrent writes. `response`
is truncated to 500 chars to match the prior inline behavior. Never
raises — DB write failures are logged, not propagated (the caller already
spoke the answer; a failed log-write must not crash the turn)."""
if rid is None:
return False
try:
c = _db_connect()
c.execute("UPDATE sessions SET response=? WHERE id=?", (str(response)[:500], rid))
c.commit()
c.close()
return True
except sqlite3.Error as e:
log.warning("update_session_response failed for id=%s: %s", rid, e)
return False

def get_memory(n=5, user_id=None):
try:
c = sqlite3.connect(DB_PATH)
c = _db_connect()
if user_id is not None:
rows = c.execute("SELECT timestamp,task,app,response FROM sessions WHERE user_id=? ORDER BY id DESC LIMIT ?", (user_id, n)).fetchall()
else:
Expand All @@ -96,7 +136,7 @@ def get_memory(n=5, user_id=None):

def get_recent_conversations(n=10, user_id=None):
try:
c = sqlite3.connect(DB_PATH)
c = _db_connect()
if user_id is not None:
rows = c.execute("SELECT role, content FROM conversations WHERE user_id=? ORDER BY id DESC LIMIT ?", (user_id, n)).fetchall()
else:
Expand Down
2 changes: 2 additions & 0 deletions docs/audits/PHASE-1-CODE-QUALITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ Both scan `SKILLS_DIR` independently, so a skill file is loaded twice in differe

### A-20 — Inline `import sqlite3` + raw `c = sqlite3.connect(DB_PATH)` for one-off UPDATE bypasses CodecMemory [MEDIUM]
**Location:** `codec.py:716-721` (the inline UPDATE) plus `codec_dashboard.py:1408` (qchat_db lazy global) plus `codec_dashboard.py:1714` (vibe_db lazy global) plus dozens of `c.execute(...)` direct SQL elsewhere.

> **Closed by PR-3 A-20** (branch `fix/pr3-a20-inline-sqlite`). **Correction over the audit's literal fix:** the `sessions` table is owned by `codec_core` (not `CodecMemory` — that owns `conversations`), so the new method lives there. Added `codec_core._db_connect()` (sets `PRAGMA journal_mode=WAL` + `PRAGMA busy_timeout=5000`, matching `CodecMemory` / `routes/_shared.get_db()`) and `codec_core.update_session_response(rid, response)` (truncates to 500 chars, never raises). Replaced codec.py's inline `sqlite3.connect(DB_PATH); UPDATE sessions SET response=? WHERE id=?` with the helper call; removed the now-unused `sqlite3` + `DB_PATH` imports from codec.py. **Root-cause fix beyond the one site:** retrofitted ALL four real codec_core session connects (`init_db`, `save_task`, `get_memory`, `get_recent_conversations`) to use `_db_connect()` — they had the same no-pragma lock risk. So under concurrent Phase-3 agent-runner + voice writes, the `sessions` table now gets WAL + a 5s busy_timeout instead of an immediate `database is locked`. (The dashboard `qchat_db`/`vibe_db` lazy globals already set pragmas — left as-is per the audit. The string-template connects inside the deprecated `build_session_script` are untouched.) 9 tests in `tests/test_session_response_update.py`.
**Description:** `codec.py:_dispatch_inner` opens a fresh sqlite3 connection, does ONE `UPDATE sessions SET response=? WHERE id=?`, commits, closes. No WAL pragma, no busy_timeout. The very next try-block (lines 723-728) does use `CodecMemory()` properly. So the same handler mixes raw SQL and the abstraction. (P-3 refuted: the SQL is `WHERE id=?` not `WHERE task=? AND app=? ORDER BY id DESC LIMIT 1` — no broken ORDER BY+LIMIT in UPDATE — but the inline-sqlite anti-pattern is real.)
**Impact:** Two consequences. (1) Under concurrent load (Phase 3 agent runner writing alongside voice handler writing) the codec.py UPDATE can hit "database is locked" because it skips the WAL+busy_timeout setup that `routes/_shared.get_db()` applies (`routes/_shared.py`). (2) Bypasses any future hook/audit you'd add to CodecMemory.
**Recommended fix:** Replace `codec.py:716-721` with `from codec_memory import CodecMemory; CodecMemory().update_session_response(rid, answer[:500])` (adding the method to CodecMemory if needed). Audit `codec_dashboard.py` for the same anti-pattern elsewhere (qchat_db / vibe_db are already lazy-cached globals with pragmas set — those are OK).
Expand Down
3 changes: 2 additions & 1 deletion docs/audits/PHASE-1-CONSOLIDATED-TRIAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,8 @@ Mirror the Intake Phase 3 wave pattern. 7 waves planned; sizes are PR-counts, NO
- PR-3E: A-11 + A-12 — unify vision + 51-site `chat/completions` through `codec_llm_proxy`
- PR-3F (optional, large): A-19 — bridge unification (iMessage + Telegram → `BridgeRouter`)
- PR-3G: small misc ✅ (branch `fix/pr3g-small-misc-cleanup`) — closed A-9 (DISABLED overlay, ~90 LOC), A-10 (run_session_module, 33 LOC + orphan `import sys`), A-14 (close_session shadow import), A-18 (9 unused Pydantic models + dead typing import). A-13 (dashboard pattern blocker) verified **already closed by PR-2C**. 6 regression tests; zero net-new ruff (net −); full suite 1344 passing. **Deferred from this batch (each needs its own focused PR):** A-8 (codec_keyboard.py 398 LOC — verify-first delete-or-migrate decision), A-15 (config_version — additive migration feature touching `load_config`), A-20 (inline sqlite in the live dispatch path — reliability fix needing a CodecMemory method).
- A-15: config schema versioning ✅ (branch `fix/pr3-a15-config-versioning`; `CONFIG_SCHEMA_VERSION=1` + migration ladder + idempotent atomic write-back in `load_config`; never creates-on-missing or overwrites-corrupt; 12 tests; zero net-new ruff; full suite 1356 passing). Still deferred: A-8, A-20.
- A-15: config schema versioning ✅ (branch `fix/pr3-a15-config-versioning`; `CONFIG_SCHEMA_VERSION=1` + migration ladder + idempotent atomic write-back in `load_config`; never creates-on-missing or overwrites-corrupt; 12 tests; zero net-new ruff; full suite 1356 passing).
- A-20: inline-sqlite reliability fix ✅ (branch `fix/pr3-a20-inline-sqlite`; added `codec_core._db_connect()` with WAL+busy_timeout + `update_session_response()`; replaced codec.py's inline lock-prone UPDATE; retrofitted all 4 codec_core session connects; removed now-unused sqlite3/DB_PATH imports; 9 tests; net-negative ruff; full suite 1365 passing). **Still deferred: A-8** (codec_keyboard.py 398 LOC — verify-first delete-or-migrate).

**Rationale:** Audit A is the broadest in scope — clean up patterns and dead code. PR-3A alone deletes ~730 LOC and improves the first-impression of the most-read file.

Expand Down
129 changes: 129 additions & 0 deletions tests/test_session_response_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""Tests for A-20 — inline-sqlite UPDATE replaced by codec_core helper.

codec.py's voice handler opened a bare `sqlite3.connect(DB_PATH)` for one
`UPDATE sessions SET response=? WHERE id=?` with no WAL/busy_timeout —
risking `database is locked` under concurrent agent-runner + voice writes.
Now it routes through `codec_core.update_session_response`, and all
codec_core session helpers share a `_db_connect()` with WAL + busy_timeout.

Reference: docs/audits/PHASE-1-CODE-QUALITY.md finding A-20.
"""
from __future__ import annotations

import sqlite3
import sys
from pathlib import Path

import pytest

REPO = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(REPO))

import codec_core # noqa: E402


@pytest.fixture
def tmp_db(tmp_path, monkeypatch):
"""Point codec_core at a throwaway DB + init the schema."""
db = tmp_path / "memory.db"
monkeypatch.setattr(codec_core, "DB_PATH", str(db))
codec_core.init_db()
return db


# ── _db_connect pragmas ──────────────────────────────────────────────────────


def test_db_connect_sets_wal_and_busy_timeout(tmp_db):
c = codec_core._db_connect()
try:
mode = c.execute("PRAGMA journal_mode").fetchone()[0]
busy = c.execute("PRAGMA busy_timeout").fetchone()[0]
finally:
c.close()
assert mode.lower() == "wal", f"expected WAL, got {mode!r}"
assert busy == 5000, f"expected busy_timeout 5000, got {busy}"


# ── update_session_response round-trip ───────────────────────────────────────


def test_update_session_response_round_trip(tmp_db):
rid = codec_core.save_task("translate this", "CODEC")
assert codec_core.update_session_response(rid, "voila — translated") is True
# Read it back directly
c = sqlite3.connect(str(tmp_db))
row = c.execute("SELECT response FROM sessions WHERE id=?", (rid,)).fetchone()
c.close()
assert row[0] == "voila — translated"


def test_update_session_response_truncates_to_500(tmp_db):
rid = codec_core.save_task("long one", "CODEC")
codec_core.update_session_response(rid, "x" * 1000)
c = sqlite3.connect(str(tmp_db))
row = c.execute("SELECT response FROM sessions WHERE id=?", (rid,)).fetchone()
c.close()
assert len(row[0]) == 500


def test_update_session_response_none_rid_returns_false(tmp_db):
assert codec_core.update_session_response(None, "anything") is False


def test_update_session_response_nonexistent_rid_no_crash(tmp_db):
# UPDATE affecting 0 rows is not an error — must not raise, returns True.
assert codec_core.update_session_response(999999, "ghost") is True


def test_update_session_response_coerces_non_str(tmp_db):
rid = codec_core.save_task("num", "CODEC")
assert codec_core.update_session_response(rid, 12345) is True
c = sqlite3.connect(str(tmp_db))
row = c.execute("SELECT response FROM sessions WHERE id=?", (rid,)).fetchone()
c.close()
assert row[0] == "12345"


def test_update_session_response_surfaces_in_get_memory(tmp_db):
rid = codec_core.save_task("weather in Paris", "CODEC")
codec_core.update_session_response(rid, "It's sunny, 22C")
mem = codec_core.get_memory(n=5)
assert "weather in Paris" in mem
assert "sunny" in mem


# ── Source-level invariant ───────────────────────────────────────────────────


def test_codec_no_inline_sqlite_update():
"""codec.py must no longer open a raw sqlite3 connection for the session
response UPDATE — it routes through codec_core.update_session_response."""
src = (REPO / "codec.py").read_text()
assert "UPDATE sessions SET response" not in src, (
"inline UPDATE sessions must be gone from codec.py (A-20)"
)
assert "update_session_response(rid" in src, (
"codec.py must call codec_core.update_session_response"
)


def test_codec_core_session_helpers_use_db_connect():
"""The only bare `sqlite3.connect(DB_PATH)` left in codec_core is the one
INSIDE `_db_connect` (the canonical helper). All session helpers
(init_db/save_task/update_session_response/get_memory/get_recent_*) route
through `_db_connect()`. String-template connects in the deprecated
build_session_script (`L.append("...")`) don't count."""
src = (REPO / "codec_core.py").read_text()
bare = 0
for line in src.splitlines():
stripped = line.strip()
if stripped.startswith("L.append"):
continue
if stripped == "c = sqlite3.connect(DB_PATH)":
bare += 1
assert bare == 1, (
f"expected exactly ONE bare connect (inside _db_connect); found {bare}"
)
# And the helpers call _db_connect
assert src.count("_db_connect()") >= 4, "session helpers must use _db_connect()"
Loading