fix: address review findings (security + bugs) in stop-judge.py (#159)

ANcpLua · web-flow · commit 236cf8f7a861 · 2026-03-06T20:28:47.000+01:00
- Security: move throttle file from /tmp to ~/.claude/qyl-continuation/ (symlink attack)
- Bug: remove stop_hook_active gate — field doesn't exist in Stop payload
- Bug: read msg.message.content for transcript entries (H1-H4 were inoperative)
- Bug: read transcript in Python instead of shelling out to tail
- Bug: byte-aware truncation for MAX_CONTEXT_BYTES
- Logic: H3/H4 now check for NEXT_STEP_RX before stopping
- Robustness: handle structured_output being null
diff --git a/plugins/qyl-continuation/hooks/stop-judge.py b/plugins/qyl-continuation/hooks/stop-judge.py
@@ -22,6 +22,9 @@
 JUDGE_TIMEOUT = int(os.environ.get("QYL_CONTINUATION_TIMEOUT", "30"))
 JUDGE_MODEL = os.environ.get("QYL_CONTINUATION_MODEL", "haiku")
 
+WORK_DIR = Path.home() / ".claude" / "qyl-continuation"
+WORK_DIR.mkdir(parents=True, exist_ok=True)
+
 
 def approve(reason: str) -> NoReturn:
     json.dump({"decision": "approve", "reason": reason}, sys.stdout)
@@ -39,7 +42,7 @@ def block(reason: str) -> NoReturn:
 event = json.loads(sys.stdin.read())
 transcript_path = event.get("transcript_path", "")
 session_id = event.get("session_id", "unknown")
-throttle_file = Path(f"/tmp/.qyl-continue-{session_id.replace('/', '_')}")
+throttle_file = WORK_DIR / f"throttle-{session_id.replace('/', '_')}"
 
 
 def read_throttle() -> tuple[int, float]:
@@ -60,13 +63,13 @@ def clear_throttle() -> None:
     throttle_file.unlink(missing_ok=True)
 
 
-if event.get("stop_hook_active", False):
-    count, last_time = read_throttle()
-    if time.time() - last_time > WINDOW_SECONDS:
-        count = 0
-    if count >= MAX_CONTINUATIONS:
-        clear_throttle()
-        approve("Max continuation cycles reached")
+# Throttle guard — always check, not gated on stop_hook_active
+count, last_time = read_throttle()
+if time.time() - last_time > WINDOW_SECONDS:
+    count = 0
+if count >= MAX_CONTINUATIONS:
+    clear_throttle()
+    approve("Max continuation cycles reached")
 
 
 # --- Load transcript ---
@@ -75,12 +78,12 @@ def clear_throttle() -> None:
     approve("No transcript")
 
 try:
-    tail = subprocess.run(
-        ["tail", "-n", str(TAIL_LINES), transcript_path],
-        capture_output=True, text=True, timeout=5
-    )
-    lines = [l.strip() for l in tail.stdout.strip().split("\n") if l.strip()]
-except (subprocess.TimeoutExpired, OSError):
+    p = Path(transcript_path)
+    raw = p.read_bytes()
+    tail_bytes = raw[-(TAIL_LINES * 8192):] if len(raw) > TAIL_LINES * 8192 else raw
+    lines = [l.strip() for l in tail_bytes.decode("utf-8", errors="ignore").split("\n") if l.strip()]
+    lines = lines[-TAIL_LINES:]
+except OSError:
     approve("Failed to read transcript")
 
 messages: list[dict] = []
@@ -96,12 +99,19 @@ def clear_throttle() -> None:
 
 # --- Transcript helpers ---
 
+def _get_content(msg: dict):
+    content = msg.get("content", "")
+    if not content and isinstance(msg.get("message"), dict):
+        content = msg["message"].get("content", "")
+    return content
+
+
 def role_of(msg: dict) -> str:
     return msg.get("role", msg.get("type", ""))
 
 
 def text_of(msg: dict) -> str:
-    content = msg.get("content", "")
+    content = _get_content(msg)
     if isinstance(content, str):
         return content
     if isinstance(content, list):
@@ -114,14 +124,19 @@ def text_of(msg: dict) -> str:
 
 
 def has_block_type(msg: dict, block_type: str) -> bool:
-    content = msg.get("content", "")
+    content = _get_content(msg)
     return isinstance(content, list) and any(
         isinstance(b, dict) and b.get("type") == block_type for b in content
     )
 
 
 # --- Phase 1: Heuristics ---
 
+NEXT_STEP_RX = re.compile(
+    r"(?i)(?:next|now) (?:I(?:'ll| will| need to)|let me)|"
+    r"moving on to|(?:still|also) need to|remaining (?:items|tasks|steps)"
+)
+
 last_assistant = ""
 for msg in reversed(messages):
     if role_of(msg) == "assistant":
@@ -145,15 +160,11 @@ def has_block_type(msg: dict, block_type: str) -> bool:
     r"(?i)that(?:'s| should be) (?:it|all|everything)|"
     r"(?i)here(?:'s| is) (?:the|your|a) (?:summary|result|output)"
 )
-NEXT_STEP_RX = re.compile(
-    r"(?i)(?:next|now) (?:I(?:'ll| will| need to)|let me)|"
-    r"moving on to|(?:still|also) need to|remaining (?:items|tasks|steps)"
-)
 if last_assistant and COMPLETION_RX.search(last_assistant) and not NEXT_STEP_RX.search(last_assistant):
     clear_throttle()
     approve("Heuristic: completion signal")
 
-# H3: Tool result already addressed — assistant text after tool result
+# H3: Tool result already addressed — assistant text after tool result (but not if next steps stated)
 saw_tool = False
 addressed = False
 for msg in messages:
@@ -164,21 +175,27 @@ def has_block_type(msg: dict, block_type: str) -> bool:
         addressed = True
 
 last_role = role_of(messages[-1]) if messages else ""
-if addressed and last_role == "assistant":
+if addressed and last_role == "assistant" and not NEXT_STEP_RX.search(last_assistant):
     clear_throttle()
     approve("Heuristic: tool results already addressed")
 
-# H4: Substantial text-only response (no pending tool calls)
+# H4: Substantial text-only response (no pending tool calls, no stated next steps)
 last_msg = messages[-1]
 if role_of(last_msg) == "assistant" and not has_block_type(last_msg, "tool_use"):
-    if len(text_of(last_msg)) > 100:
+    text = text_of(last_msg)
+    if len(text) > 100 and not NEXT_STEP_RX.search(text):
         clear_throttle()
         approve("Heuristic: substantial text-only response")
 
 
 # --- Phase 2: Haiku judge ---
 
-transcript_json = json.dumps(messages, ensure_ascii=False)[:MAX_CONTEXT_BYTES]
+raw_transcript = json.dumps(messages, ensure_ascii=False)
+transcript_bytes = raw_transcript.encode("utf-8")
+if len(transcript_bytes) > MAX_CONTEXT_BYTES:
+    transcript_json = transcript_bytes[:MAX_CONTEXT_BYTES].decode("utf-8", errors="ignore")
+else:
+    transcript_json = raw_transcript
 
 EVAL_PROMPT = f"""Does the assistant have more autonomous work to do RIGHT NOW?
 
@@ -198,8 +215,6 @@ def has_block_type(msg: dict, block_type: str) -> bool:
 A tool_result or image does NOT mean unaddressed work — check for assistant text AFTER it.
 Default: STOP."""
 
-work_dir = Path.home() / ".claude" / "qyl-continuation"
-work_dir.mkdir(parents=True, exist_ok=True)
 env = os.environ.copy()
 env["CLAUDE_HOOK_JUDGE_MODE"] = "true"
 
@@ -211,7 +226,7 @@ def has_block_type(msg: dict, block_type: str) -> bool:
          "--system-prompt", "Conversation state classifier. Output JSON only. No code, no tools.",
          "--disallowedTools", "*"],
         input=EVAL_PROMPT, capture_output=True, text=True,
-        timeout=JUDGE_TIMEOUT, cwd=str(work_dir), env=env,
+        timeout=JUDGE_TIMEOUT, cwd=str(WORK_DIR), env=env,
     )
 except (subprocess.TimeoutExpired, OSError):
     approve("Judge unavailable, allowing stop")
@@ -220,8 +235,9 @@ def has_block_type(msg: dict, block_type: str) -> bool:
     approve("Judge failed, allowing stop")
 
 try:
-    evaluation = json.loads(proc.stdout).get("structured_output", {})
-except (json.JSONDecodeError, AttributeError, TypeError):
+    output = json.loads(proc.stdout)
+    evaluation = (output.get("structured_output") or {}) if isinstance(output, dict) else {}
+except json.JSONDecodeError:
     approve("Judge response unparseable, allowing stop")
 
 if evaluation.get("should_continue", False):