-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy paththinking_repair.py
More file actions
107 lines (87 loc) · 3.33 KB
/
thinking_repair.py
File metadata and controls
107 lines (87 loc) · 3.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python3
"""Repair conversation JSONL files missing thinking blocks in assistant messages with tool calls."""
import argparse
import json
import sys
from pathlib import Path
PLACEHOLDER_THINKING = {
"type": "thinking",
"thinking": "(reasoning content was not preserved)",
}
def needs_repair(message: dict) -> bool:
"""Check if an assistant message has tool calls but no thinking block."""
if message.get("role") != "assistant":
return False
content = message.get("content")
if not isinstance(content, list):
return False
has_tool_use = any(
isinstance(b, dict) and b.get("type") == "tool_use" for b in content
)
has_thinking = any(
isinstance(b, dict) and b.get("type") == "thinking" for b in content
)
return has_tool_use and not has_thinking
def repair_message(message: dict) -> dict:
"""Insert a placeholder thinking block at the start of message content."""
message = message.copy()
message["content"] = [PLACEHOLDER_THINKING.copy()] + list(message["content"])
return message
def repair_file(path: Path, *, in_place: bool = False, dry_run: bool = False) -> int:
"""Scan and repair a JSONL file. Returns count of repaired messages."""
lines = path.read_text().splitlines()
repaired_count = 0
output_lines = []
for lineno, line in enumerate(lines, 1):
stripped = line.strip()
if not stripped:
output_lines.append(line)
continue
try:
message = json.loads(stripped)
except json.JSONDecodeError:
print(f"warning: skipping invalid JSON at line {lineno}", file=sys.stderr)
output_lines.append(line)
continue
if needs_repair(message):
repaired_count += 1
if not dry_run:
message = repair_message(message)
print(
f" line {lineno}: assistant message with tool calls missing thinking block",
file=sys.stderr,
)
output_lines.append(json.dumps(message))
if not dry_run and repaired_count > 0:
output_text = "\n".join(output_lines) + "\n"
if in_place:
path.write_text(output_text)
else:
sys.stdout.write(output_text)
return repaired_count
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description="Repair conversation JSONL files missing thinking blocks.",
)
parser.add_argument("files", nargs="+", type=Path, help="JSONL files to repair")
parser.add_argument(
"-i", "--in-place", action="store_true",
help="modify files in place instead of writing to stdout",
)
parser.add_argument(
"-n", "--dry-run", action="store_true",
help="report issues without modifying anything",
)
args = parser.parse_args(argv)
total = 0
for filepath in args.files:
if not filepath.exists():
print(f"error: {filepath} not found", file=sys.stderr)
return 1
print(f"scanning {filepath}...", file=sys.stderr)
total += repair_file(filepath, in_place=args.in_place, dry_run=args.dry_run)
label = "would repair" if args.dry_run else "repaired"
print(f"{label} {total} message(s)", file=sys.stderr)
return 0
if __name__ == "__main__":
sys.exit(main())