1- import argparse
1+ from __future__ import annotations
2+
23import asyncio
34import datetime
45import json
1112import structlog .stdlib
1213from tqdm .asyncio import tqdm_asyncio
1314
14- from paperbench .monitor .create_monitor import create_monitor
15+ import chz
16+ from paperbench .monitor .monitor import BasicMonitor , Monitor
1517from paperbench .paper_registry import paper_registry
1618
1719logger = structlog .stdlib .get_logger (component = __name__ )
1820
1921
22+ def _describe_monitor_config (monitor_config : Monitor .Config ) -> str :
23+ return f"{ monitor_config .__class__ .__module__ } .{ monitor_config .__class__ .__qualname__ } "
24+
25+
26+ @chz .chz
27+ class MonitorCLIArgs :
28+ """Monitor agent logs for violations."""
29+
30+ logs_dir : Path = chz .field (
31+ doc = "Directory containing multiple run groups." ,
32+ )
33+ run_groups : list [str ] = chz .field (
34+ default_factory = list ,
35+ doc = "List of run group IDs to monitor." ,
36+ )
37+ monitor_config : Monitor .Config = chz .field (
38+ default_factory = BasicMonitor .Config ,
39+ doc = "Specify the monitor to use (default: BasicMonitor)." ,
40+ )
41+ out_dir : Path | None = chz .field (
42+ default = None ,
43+ doc = "Directory to save the monitor results JSON file (default: current directory)." ,
44+ )
45+
46+
2047def get_paper_id_from_run_id (run_id : str ) -> str :
2148 """Extract paper ID from run ID (e.g. 'rice_508398cb-0825-4bf0-b647-a9200ac03d21' -> 'rice')"""
2249 return run_id .split ("_" )[0 ]
2350
2451
2552async def monitor_single_log (
2653 run_dir : Path ,
27- monitor_type : str ,
54+ monitor_config : Monitor . Config ,
2855) -> dict [str , Any ] | None :
2956 """
3057 Monitor a single run's log with the specified monitor.
@@ -67,22 +94,24 @@ async def monitor_single_log(
6794 logger .warning (f"Log file not found at { log_file } " )
6895 return None
6996
70- logger .info (f"Running monitor on agent.log from { run_id } " )
97+ monitor_config_payload = monitor_config .model_dump (mode = "json" )
98+ logger .info (
99+ f"Running monitor on agent.log from { run_id } " ,
100+ monitor = _describe_monitor_config (monitor_config ),
101+ monitor_config_json = json .dumps (monitor_config_payload , indent = 2 ),
102+ )
71103
72104 # Create monitor
73105 paper = paper_registry .get_paper (paper_id )
74- monitor = create_monitor (
75- monitor_type = monitor_type ,
76- paper = paper ,
77- monitor_kwargs = {},
78- )
106+ monitor = monitor_config .build (paper = paper )
79107
80108 # Run monitor on the log file
81109 result = await asyncio .to_thread (monitor .check_log , log_file .as_posix ())
82110
83111 return {
84112 "run_group_id" : run_dir .parent .name ,
85- "monitor_type" : monitor_type ,
113+ "monitor_type" : _describe_monitor_config (monitor_config ),
114+ "monitor_config" : monitor_config_payload ,
86115 "paper_id" : paper_id ,
87116 "log_file" : str (log_file ),
88117 "run_id" : run_id ,
@@ -103,7 +132,7 @@ async def monitor_single_log(
103132
104133async def monitor_run_group (
105134 group_dir : Path ,
106- monitor_type : str ,
135+ monitor_config : Monitor . Config ,
107136) -> list [dict [str , Any ] | None ]:
108137 """Monitor all runs in a run group directory."""
109138 run_group_id = group_dir .name
@@ -115,7 +144,7 @@ async def monitor_run_group(
115144 tasks = [
116145 monitor_single_log (
117146 run_dir = run_dir ,
118- monitor_type = monitor_type ,
147+ monitor_config = monitor_config ,
119148 )
120149 for run_dir in run_dirs
121150 ]
@@ -126,7 +155,7 @@ async def monitor_run_group(
126155
127156async def monitor_multiple_run_groups (
128157 logs_dir : Path ,
129- monitor_type : str ,
158+ monitor_config : Monitor . Config ,
130159 run_groups : list [str ] | None = None ,
131160) -> dict [str , Any ] | None :
132161 """Run monitor on multiple run groups that are in a directory of run groups."""
@@ -154,7 +183,7 @@ async def monitor_multiple_run_groups(
154183 tasks = [
155184 monitor_run_group (
156185 group_dir = logs_dir / run_group_id ,
157- monitor_type = monitor_type ,
186+ monitor_config = monitor_config ,
158187 )
159188 for run_group_id in run_groups
160189 ]
@@ -169,10 +198,13 @@ async def monitor_multiple_run_groups(
169198 flagged_results = [result for result in all_results if len (result ["results" ]["violations" ]) > 0 ]
170199 other_results = [result for result in all_results if len (result ["results" ]["violations" ]) == 0 ]
171200
201+ monitor_config_payload = monitor_config .model_dump (mode = "json" )
202+
172203 # Create final output with results and summary
173204 return {
174205 "timestamp" : datetime .datetime .now ().isoformat (),
175- "monitor_type" : monitor_type ,
206+ "monitor_type" : _describe_monitor_config (monitor_config ),
207+ "monitor_config" : monitor_config_payload ,
176208 "logs_dir" : str (logs_dir .absolute ()),
177209 "run_groups" : run_groups ,
178210 "total_runs" : len (all_results ),
@@ -184,21 +216,23 @@ async def monitor_multiple_run_groups(
184216
185217
186218async def main (
187- monitor_type : str ,
188219 logs_dir : Path ,
220+ monitor_config : Monitor .Config ,
189221 run_groups : list [str ] | None = None ,
190222 out_dir : Path | None = None ,
191223) -> None :
192224 """
193225 Main function to run the monitor on a directory of logs.
194226 """
195227
228+ monitor_config = monitor_config .model_copy ()
229+
196230 if out_dir :
197231 out_dir .mkdir (parents = True , exist_ok = True )
198232
199233 results = await monitor_multiple_run_groups (
200234 logs_dir = logs_dir ,
201- monitor_type = monitor_type ,
235+ monitor_config = monitor_config ,
202236 run_groups = run_groups ,
203237 )
204238
@@ -211,41 +245,14 @@ async def main(
211245 logger .info (f"All monitor results written to { output_file } " )
212246
213247
214- if __name__ == "__main__" :
215- parser = argparse .ArgumentParser (description = "Monitor agent logs for violations." )
216- parser .add_argument (
217- "--logs-dir" ,
218- type = Path ,
219- help = "Directory containing multiple run groups." ,
220- required = True ,
221- )
222- parser .add_argument (
223- "--run-groups" ,
224- nargs = "+" ,
225- help = "List of run group IDs to monitor." ,
226- required = False ,
227- )
228- parser .add_argument (
229- "-m" ,
230- "--monitor" ,
231- choices = ["basic" ],
232- default = "basic" ,
233- help = "Specify the monitor to use (default: basic)." ,
234- )
235- parser .add_argument (
236- "--out-dir" ,
237- type = Path ,
238- help = "Directory to save the monitor results JSON file (default: current directory)." ,
239- required = False ,
248+ async def _run_from_cli (args : MonitorCLIArgs ) -> None :
249+ await main (
250+ logs_dir = args .logs_dir ,
251+ monitor_config = args .monitor_config ,
252+ run_groups = args .run_groups or None ,
253+ out_dir = args .out_dir ,
240254 )
241255
242- args = parser .parse_args ()
243256
244- asyncio .run (
245- main (
246- monitor_type = args .monitor ,
247- logs_dir = args .logs_dir ,
248- run_groups = args .run_groups ,
249- out_dir = args .out_dir ,
250- )
251- )
257+ if __name__ == "__main__" :
258+ asyncio .run (chz .nested_entrypoint (_run_from_cli ))
0 commit comments