Skip to content

Commit 2b4633a

Browse files
update
1 parent 1de1e51 commit 2b4633a

File tree

1 file changed

+149
-172
lines changed
  • src/agentlab/agents/human_trace_recorder

1 file changed

+149
-172
lines changed
Lines changed: 149 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -1,215 +1,192 @@
1+
"""Minimal Human Trace Agent (<200 lines)
2+
3+
Per step we capture ONLY:
4+
- axtree_txt, pruned_html, actions.json, after.html
5+
- Auto-resume after detecting user action
6+
- Visible recording indicator
7+
"""
8+
19
from __future__ import annotations
210

3-
import logging
4-
import textwrap
11+
import json
12+
import time
13+
import zipfile
514
from dataclasses import dataclass
15+
from pathlib import Path
616

717
import bgym
818
from playwright.sync_api import Page
919

1020
from agentlab.agents.agent_args import AgentArgs
11-
12-
logger = logging.getLogger(__name__)
13-
14-
15-
# ---------------------------------------------------------------------------
16-
# Simplified variant: capture human step (trace + screenshot + html) only
17-
# ---------------------------------------------------------------------------
21+
from browsergym.utils.obs import flatten_axtree_to_str, flatten_dom_to_str, prune_html
1822

1923

2024
@dataclass
21-
class SimpleHumanTraceCaptureAgentArgs(AgentArgs):
22-
"""Args for SimpleHumanTraceCaptureAgent.
23-
24-
This version ONLY captures what the human does in the paused browser per step.
25-
It does NOT attempt to map or translate actions. Always returns noop().
26-
Set use_raw_page_output=True in loop/env so that obs contains a Playwright Page.
27-
"""
28-
29-
agent_name: str = "SimpleHumanTraceCapture"
25+
class HumanTraceAgentArgs(AgentArgs):
26+
agent_name: str = "HumanTraceAgent"
3027
trace_dir: str = "human_traces"
31-
screenshots: bool = True
32-
snapshots: bool = True # playwright tracing snapshots (DOM/Sources)
33-
sources: bool = False # include source files (bigger trace)
34-
# Ensure the raw Playwright Page object is present in observations so we can pause.
3528
use_raw_page_output: bool = True
3629

37-
def make_agent(self) -> bgym.Agent:
38-
return SimpleHumanTraceCaptureAgent(
39-
trace_dir=self.trace_dir,
40-
screenshots=self.screenshots,
41-
snapshots=self.snapshots,
42-
sources=self.sources,
43-
)
30+
def make_agent(self) -> bgym.Agent: # type: ignore[override]
31+
return HumanTraceAgent(self.trace_dir)
4432

4533
def set_reproducibility_mode(self):
4634
pass
4735

4836

49-
class SimpleHumanTraceCaptureAgent(bgym.Agent):
50-
"""Minimal human-in-the-loop recorder.
51-
52-
On each get_action:
53-
1. Start a Playwright tracing capture (if not already running for this step).
54-
2. Call page.pause() to open Inspector; user performs EXACTLY one logical action.
55-
3. Stop tracing, save trace zip, screenshot (after action), and HTML snapshot.
56-
4. Return noop() so the environment advances.
57-
58-
Artifacts are stored under trace_dir/step_<n>/
59-
"""
60-
61-
def __init__(self, trace_dir: str, screenshots: bool, snapshots: bool, sources: bool):
37+
class HumanTraceAgent(bgym.Agent):
38+
def __init__(self, trace_dir: str):
6239
self.action_set = bgym.HighLevelActionSet(["bid"], multiaction=False)
63-
self._step_idx = 0
64-
from pathlib import Path
65-
6640
self._root = Path(trace_dir)
6741
self._root.mkdir(parents=True, exist_ok=True)
68-
# Store trace config booleans; Playwright tracing.start expects them as named params.
69-
self._trace_conf = dict(screenshots=screenshots, snapshots=snapshots, sources=sources)
70-
self._tracing_started = False # track if global tracing has been started
71-
self._page: Page | None = None # optional persistent page ref (when not in obs)
72-
73-
def set_page(self, page: Page):
74-
"""Manually inject a Playwright Page so the agent can function without it in obs.
75-
76-
Call this once after you create / reset the environment if you prefer not to
77-
expose the page through observations (e.g., for safety or serialization reasons).
78-
"""
79-
self._page = page
80-
81-
def obs_preprocessor(self, obs): # keep original obs so page is available
42+
self._page: Page | None = None
43+
self._step = 0
44+
self._task_name = None
45+
self._seed = None
46+
47+
def obs_preprocessor(self, obs: dict): # type: ignore[override]
48+
if isinstance(obs, dict):
49+
if self._page is None and "page" in obs:
50+
self._page = obs["page"]
51+
52+
# Extract task name and seed from obs if available
53+
if self._task_name is None:
54+
self._task_name = obs.get("task_name") or obs.get("task", {}).get(
55+
"task_name", "unknown_task"
56+
)
57+
if self._seed is None:
58+
self._seed = obs.get("seed") or obs.get("task", {}).get("seed", "unknown_seed")
59+
60+
dom = obs.get("dom_object")
61+
axt = obs.get("axtree_object")
62+
if axt is not None:
63+
try:
64+
obs["axtree_txt"] = flatten_axtree_to_str(axt)
65+
except Exception:
66+
pass
67+
if dom is not None:
68+
try:
69+
obs["pruned_html"] = prune_html(flatten_dom_to_str(dom))
70+
except Exception:
71+
pass
72+
for k in ("dom_object", "axtree_object", "page"):
73+
obs.pop(k, None)
8274
return obs
8375

8476
def get_action(self, obs: dict): # type: ignore[override]
85-
import json
86-
import time
87-
88-
# Resolve page priority: observation > stored page
89-
page: Page | None = obs.get("page") or self._page
90-
if page is None:
91-
raise RuntimeError(
92-
"No Playwright Page available. Provide use_raw_page_output=True OR call set_page(page)."
93-
)
94-
# Cache page if first time we see it via obs so later steps can omit it
9577
if self._page is None:
96-
self._page = page
78+
raise RuntimeError("Playwright Page missing; ensure use_raw_page_output=True")
9779

98-
step_dir = self._root / f"step_{self._step_idx:04d}"
80+
page = self._page
81+
82+
# Create directory structure: trace_dir/task_name/seed/step_XXXX
83+
task_dir = self._root / str(self._task_name or "unknown_task")
84+
seed_dir = task_dir / str(self._seed or "unknown_seed")
85+
step_dir = seed_dir / f"step_{self._step:04d}"
9986
step_dir.mkdir(parents=True, exist_ok=True)
100-
trace_path = step_dir / "trace.zip"
101-
screenshot_path = step_dir / "after.png"
102-
html_path = step_dir / "after.html"
10387

104-
# Lazy start of tracing (once per context) then per-step chunk
105-
if not self._tracing_started:
106-
try:
107-
page.context.tracing.start(
108-
screenshots=self._trace_conf["screenshots"],
109-
snapshots=self._trace_conf["snapshots"],
110-
sources=self._trace_conf["sources"],
111-
)
112-
self._tracing_started = True
113-
except Exception as e: # pragma: no cover
114-
print(f"[SimpleHumanTraceCapture][WARN] initial tracing.start failed: {e}")
88+
trace_path = step_dir / "temp_trace.zip"
89+
actions_path = step_dir / "actions.json"
90+
91+
print(
92+
f"[HumanTrace] Task: {self._task_name}, Seed: {self._seed}, Step {self._step}: Perform ONE action"
93+
)
11594

95+
# Small recording indicator
96+
page.evaluate(
97+
"""
98+
const div = document.createElement('div');
99+
div.id = '__rec';
100+
div.innerHTML = '🔴 REC';
101+
div.style.cssText = 'position:fixed;top:5px;right:5px;background:#f44;color:#fff;padding:5px 8px;border-radius:4px;font:bold 12px monospace;z-index:99999';
102+
document.body.appendChild(div);
103+
"""
104+
)
105+
106+
# Start tracing
116107
try:
108+
page.context.tracing.start(screenshots=True, snapshots=True)
117109
page.context.tracing.start_chunk()
118-
except Exception as e: # pragma: no cover
119-
print(f"[SimpleHumanTraceCapture][WARN] tracing.start_chunk failed: {e}")
110+
except Exception:
111+
pass
120112

121-
print("\n[SimpleHumanTraceCapture] Perform ONE action then resume Inspector.")
122-
print("[SimpleHumanTraceCapture] A trace will be saved to:", trace_path)
123-
try:
124-
page.pause()
125-
except Exception as e: # pragma: no cover
126-
print(f"[SimpleHumanTraceCapture][WARN] page.pause failed: {e}")
113+
# Wait for action
114+
self._wait_for_action(page)
127115

128-
# Stop current chunk & save
116+
# Stop tracing and save
129117
try:
130118
page.context.tracing.stop_chunk(path=str(trace_path))
131-
except Exception as e: # pragma: no cover
132-
print(f"[SimpleHumanTraceCapture][WARN] tracing.stop_chunk failed: {e}")
119+
actions = self._extract_trace(str(trace_path))
120+
actions_path.write_text(json.dumps(actions, indent=2))
121+
trace_path.unlink(missing_ok=True)
122+
except Exception:
123+
pass
133124

134-
# Post-action artifacts
125+
# Remove indicator
126+
page.evaluate("document.getElementById('__rec')?.remove()")
127+
128+
# Save screenshot
135129
try:
136-
page.screenshot(path=str(screenshot_path))
137-
except Exception as e: # pragma: no cover
138-
print(f"[SimpleHumanTraceCapture][WARN] screenshot failed: {e}")
130+
page.screenshot(path=str(step_dir / "screenshot.png"))
131+
except Exception:
132+
pass
133+
134+
# Save HTML
139135
try:
140-
html = page.content()
141-
html_path.write_text(html)
142-
except Exception as e: # pragma: no cover
143-
print(f"[SimpleHumanTraceCapture][WARN] html capture failed: {e}")
144-
145-
meta = {
146-
"url": page.url,
147-
"timestamp": time.time(),
148-
"step": self._step_idx,
149-
"trace_path": str(trace_path),
150-
"screenshot_path": str(screenshot_path),
151-
"html_path": str(html_path),
136+
(step_dir / "after.html").write_text(page.content())
137+
except Exception:
138+
pass
139+
140+
self._step += 1
141+
return "noop()", {
142+
"extra_info": {
143+
"step": self._step - 1,
144+
"task_name": self._task_name,
145+
"seed": self._seed,
146+
"trace_dir": str(step_dir),
147+
}
152148
}
153-
(step_dir / "meta.json").write_text(json.dumps(meta, indent=2))
154149

155-
# --- Derive a lightweight human-readable script summary from the trace ---
156-
script_summary_lines: list[str] = []
157-
try:
158-
import json as _json
159-
import zipfile
160-
161-
with zipfile.ZipFile(trace_path, "r") as zf:
162-
# Playwright trace usually contains one or more *.trace files (jsonl)
163-
trace_files = [n for n in zf.namelist() if n.endswith(".trace")]
164-
for tf in trace_files:
165-
with zf.open(tf, "r") as fh:
166-
for raw_line in fh:
167-
try:
168-
evt = _json.loads(raw_line.decode("utf-8"))
169-
except Exception:
170-
continue
171-
if evt.get("type") != "action":
172-
continue
173-
a = evt.get("action", {})
174-
api_name = a.get("apiName") or a.get("name") or "action"
175-
selector = a.get("selector") or a.get("locator") or ""
176-
value = a.get("value") or a.get("text") or ""
177-
line = f"{api_name}"
178-
if selector:
179-
line += f" selector={selector!r}"
180-
if value and isinstance(value, str) and len(value) < 200:
181-
line += f" value={value!r}"
182-
script_summary_lines.append(line)
183-
if not script_summary_lines:
184-
script_summary_lines.append("(no action events parsed from trace chunk)")
185-
except Exception as e: # pragma: no cover
186-
script_summary_lines.append(f"(failed to parse trace for script summary: {e})")
187-
188-
# Prepare chat messages (simple list of strings for easy viewing)
189-
chat_messages = [
190-
"PLAYWRIGHT TRACE STEP SUMMARY:",
191-
f"Step {self._step_idx} URL: {page.url}",
192-
"Actions:",
193-
*script_summary_lines,
194-
f"Trace file: {trace_path}",
195-
"Open with: npx playwright show-trace " + str(trace_path),
196-
]
197-
198-
self._step_idx += 1
199-
200-
agent_info = bgym.AgentInfo(
201-
think="human-recorded",
202-
chat_messages=chat_messages,
203-
stats={"step": self._step_idx},
204-
markdown_page=textwrap.dedent(
205-
f"""### Simple Human Trace Capture\nSaved artifacts for step {meta['step']}:\n- URL: {meta['url']}\n- Trace: {meta['trace_path']}\n- Screenshot: {meta['screenshot_path']}\n- HTML: {meta['html_path']}\n"""
206-
),
207-
extra_info=meta,
150+
def _wait_for_action(self, page):
151+
"""Wait for user action with auto-resume."""
152+
page.evaluate(
153+
"""
154+
window.__acted = false;
155+
['click','keydown','input','change'].forEach(e =>
156+
document.addEventListener(e, () => window.__acted = true, true)
157+
);
158+
"""
208159
)
209-
return "noop()", agent_info
210160

211-
212-
SIMPLE_TRACE_CAPTURE_AGENT = SimpleHumanTraceCaptureAgentArgs()
213-
214-
##1. Simple debug agent
215-
# 2. Instead of using the page object Launch codegen directly in a subprocess using the playwright codegen --url or somethiing
161+
start = time.time()
162+
while time.time() - start < 300: # 5 min max
163+
try:
164+
if page.evaluate("window.__acted"):
165+
page.evaluate("document.getElementById('__rec').innerHTML = '💾 SAVING'")
166+
time.sleep(0.3)
167+
return
168+
except Exception:
169+
pass
170+
time.sleep(0.1)
171+
172+
def _extract_trace(self, trace_file: str):
173+
"""Extract ALL events from trace zip."""
174+
all_events = []
175+
try:
176+
with zipfile.ZipFile(trace_file, "r") as zf:
177+
for name in zf.namelist():
178+
if name.endswith(".trace"):
179+
with zf.open(name) as f:
180+
for line in f:
181+
try:
182+
event = json.loads(line.decode())
183+
# Save everything - don't filter
184+
all_events.append(event)
185+
except Exception:
186+
continue
187+
except Exception:
188+
pass
189+
return all_events
190+
191+
192+
HUMAN_TRACE_AGENT = HumanTraceAgentArgs()

0 commit comments

Comments
 (0)