Skip to content

Commit 236cf8f

Browse files
authored
fix: address review findings (security + bugs) in stop-judge.py (#159)
- Security: move throttle file from /tmp to ~/.claude/qyl-continuation/ (symlink attack) - Bug: remove stop_hook_active gate — field doesn't exist in Stop payload - Bug: read msg.message.content for transcript entries (H1-H4 were inoperative) - Bug: read transcript in Python instead of shelling out to tail - Bug: byte-aware truncation for MAX_CONTEXT_BYTES - Logic: H3/H4 now check for NEXT_STEP_RX before stopping - Robustness: handle structured_output being null
1 parent 57f8db0 commit 236cf8f

File tree

1 file changed

+46
-30
lines changed

1 file changed

+46
-30
lines changed

plugins/qyl-continuation/hooks/stop-judge.py

Lines changed: 46 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
JUDGE_TIMEOUT = int(os.environ.get("QYL_CONTINUATION_TIMEOUT", "30"))
2323
JUDGE_MODEL = os.environ.get("QYL_CONTINUATION_MODEL", "haiku")
2424

25+
WORK_DIR = Path.home() / ".claude" / "qyl-continuation"
26+
WORK_DIR.mkdir(parents=True, exist_ok=True)
27+
2528

2629
def approve(reason: str) -> NoReturn:
2730
json.dump({"decision": "approve", "reason": reason}, sys.stdout)
@@ -39,7 +42,7 @@ def block(reason: str) -> NoReturn:
3942
event = json.loads(sys.stdin.read())
4043
transcript_path = event.get("transcript_path", "")
4144
session_id = event.get("session_id", "unknown")
42-
throttle_file = Path(f"/tmp/.qyl-continue-{session_id.replace('/', '_')}")
45+
throttle_file = WORK_DIR / f"throttle-{session_id.replace('/', '_')}"
4346

4447

4548
def read_throttle() -> tuple[int, float]:
@@ -60,13 +63,13 @@ def clear_throttle() -> None:
6063
throttle_file.unlink(missing_ok=True)
6164

6265

63-
if event.get("stop_hook_active", False):
64-
count, last_time = read_throttle()
65-
if time.time() - last_time > WINDOW_SECONDS:
66-
count = 0
67-
if count >= MAX_CONTINUATIONS:
68-
clear_throttle()
69-
approve("Max continuation cycles reached")
66+
# Throttle guard — always check, not gated on stop_hook_active
67+
count, last_time = read_throttle()
68+
if time.time() - last_time > WINDOW_SECONDS:
69+
count = 0
70+
if count >= MAX_CONTINUATIONS:
71+
clear_throttle()
72+
approve("Max continuation cycles reached")
7073

7174

7275
# --- Load transcript ---
@@ -75,12 +78,12 @@ def clear_throttle() -> None:
7578
approve("No transcript")
7679

7780
try:
78-
tail = subprocess.run(
79-
["tail", "-n", str(TAIL_LINES), transcript_path],
80-
capture_output=True, text=True, timeout=5
81-
)
82-
lines = [l.strip() for l in tail.stdout.strip().split("\n") if l.strip()]
83-
except (subprocess.TimeoutExpired, OSError):
81+
p = Path(transcript_path)
82+
raw = p.read_bytes()
83+
tail_bytes = raw[-(TAIL_LINES * 8192):] if len(raw) > TAIL_LINES * 8192 else raw
84+
lines = [l.strip() for l in tail_bytes.decode("utf-8", errors="ignore").split("\n") if l.strip()]
85+
lines = lines[-TAIL_LINES:]
86+
except OSError:
8487
approve("Failed to read transcript")
8588

8689
messages: list[dict] = []
@@ -96,12 +99,19 @@ def clear_throttle() -> None:
9699

97100
# --- Transcript helpers ---
98101

102+
def _get_content(msg: dict):
103+
content = msg.get("content", "")
104+
if not content and isinstance(msg.get("message"), dict):
105+
content = msg["message"].get("content", "")
106+
return content
107+
108+
99109
def role_of(msg: dict) -> str:
100110
return msg.get("role", msg.get("type", ""))
101111

102112

103113
def text_of(msg: dict) -> str:
104-
content = msg.get("content", "")
114+
content = _get_content(msg)
105115
if isinstance(content, str):
106116
return content
107117
if isinstance(content, list):
@@ -114,14 +124,19 @@ def text_of(msg: dict) -> str:
114124

115125

116126
def has_block_type(msg: dict, block_type: str) -> bool:
117-
content = msg.get("content", "")
127+
content = _get_content(msg)
118128
return isinstance(content, list) and any(
119129
isinstance(b, dict) and b.get("type") == block_type for b in content
120130
)
121131

122132

123133
# --- Phase 1: Heuristics ---
124134

135+
NEXT_STEP_RX = re.compile(
136+
r"(?i)(?:next|now) (?:I(?:'ll| will| need to)|let me)|"
137+
r"moving on to|(?:still|also) need to|remaining (?:items|tasks|steps)"
138+
)
139+
125140
last_assistant = ""
126141
for msg in reversed(messages):
127142
if role_of(msg) == "assistant":
@@ -145,15 +160,11 @@ def has_block_type(msg: dict, block_type: str) -> bool:
145160
r"(?i)that(?:'s| should be) (?:it|all|everything)|"
146161
r"(?i)here(?:'s| is) (?:the|your|a) (?:summary|result|output)"
147162
)
148-
NEXT_STEP_RX = re.compile(
149-
r"(?i)(?:next|now) (?:I(?:'ll| will| need to)|let me)|"
150-
r"moving on to|(?:still|also) need to|remaining (?:items|tasks|steps)"
151-
)
152163
if last_assistant and COMPLETION_RX.search(last_assistant) and not NEXT_STEP_RX.search(last_assistant):
153164
clear_throttle()
154165
approve("Heuristic: completion signal")
155166

156-
# H3: Tool result already addressed — assistant text after tool result
167+
# H3: Tool result already addressed — assistant text after tool result (but not if next steps stated)
157168
saw_tool = False
158169
addressed = False
159170
for msg in messages:
@@ -164,21 +175,27 @@ def has_block_type(msg: dict, block_type: str) -> bool:
164175
addressed = True
165176

166177
last_role = role_of(messages[-1]) if messages else ""
167-
if addressed and last_role == "assistant":
178+
if addressed and last_role == "assistant" and not NEXT_STEP_RX.search(last_assistant):
168179
clear_throttle()
169180
approve("Heuristic: tool results already addressed")
170181

171-
# H4: Substantial text-only response (no pending tool calls)
182+
# H4: Substantial text-only response (no pending tool calls, no stated next steps)
172183
last_msg = messages[-1]
173184
if role_of(last_msg) == "assistant" and not has_block_type(last_msg, "tool_use"):
174-
if len(text_of(last_msg)) > 100:
185+
text = text_of(last_msg)
186+
if len(text) > 100 and not NEXT_STEP_RX.search(text):
175187
clear_throttle()
176188
approve("Heuristic: substantial text-only response")
177189

178190

179191
# --- Phase 2: Haiku judge ---
180192

181-
transcript_json = json.dumps(messages, ensure_ascii=False)[:MAX_CONTEXT_BYTES]
193+
raw_transcript = json.dumps(messages, ensure_ascii=False)
194+
transcript_bytes = raw_transcript.encode("utf-8")
195+
if len(transcript_bytes) > MAX_CONTEXT_BYTES:
196+
transcript_json = transcript_bytes[:MAX_CONTEXT_BYTES].decode("utf-8", errors="ignore")
197+
else:
198+
transcript_json = raw_transcript
182199

183200
EVAL_PROMPT = f"""Does the assistant have more autonomous work to do RIGHT NOW?
184201
@@ -198,8 +215,6 @@ def has_block_type(msg: dict, block_type: str) -> bool:
198215
A tool_result or image does NOT mean unaddressed work — check for assistant text AFTER it.
199216
Default: STOP."""
200217

201-
work_dir = Path.home() / ".claude" / "qyl-continuation"
202-
work_dir.mkdir(parents=True, exist_ok=True)
203218
env = os.environ.copy()
204219
env["CLAUDE_HOOK_JUDGE_MODE"] = "true"
205220

@@ -211,7 +226,7 @@ def has_block_type(msg: dict, block_type: str) -> bool:
211226
"--system-prompt", "Conversation state classifier. Output JSON only. No code, no tools.",
212227
"--disallowedTools", "*"],
213228
input=EVAL_PROMPT, capture_output=True, text=True,
214-
timeout=JUDGE_TIMEOUT, cwd=str(work_dir), env=env,
229+
timeout=JUDGE_TIMEOUT, cwd=str(WORK_DIR), env=env,
215230
)
216231
except (subprocess.TimeoutExpired, OSError):
217232
approve("Judge unavailable, allowing stop")
@@ -220,8 +235,9 @@ def has_block_type(msg: dict, block_type: str) -> bool:
220235
approve("Judge failed, allowing stop")
221236

222237
try:
223-
evaluation = json.loads(proc.stdout).get("structured_output", {})
224-
except (json.JSONDecodeError, AttributeError, TypeError):
238+
output = json.loads(proc.stdout)
239+
evaluation = (output.get("structured_output") or {}) if isinstance(output, dict) else {}
240+
except json.JSONDecodeError:
225241
approve("Judge response unparseable, allowing stop")
226242

227243
if evaluation.get("should_continue", False):

0 commit comments

Comments
 (0)