Skip to content

Commit 8f2e4e1

Browse files
authored
Merge pull request #7 from gaussian/develop
Add structured logging, action timeout, and LLM retry dedup
2 parents 40a9043 + b99ba6f commit 8f2e4e1

File tree

15 files changed

+781
-197
lines changed

15 files changed

+781
-197
lines changed

.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
OPENAI_API_KEY=sk-...

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ marimo/_static/
206206
marimo/_lsp/
207207
__marimo__/
208208

209-
# shots outputs
209+
# shots config & outputs
210+
shots.yaml
210211
shots_out/
211212
report.json
212213
storage_state.json

AGENTS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Agents
2+
3+
## Git
4+
5+
- Always stage and commit in a single command: `git add file1 file2 && git commit -m "message"`
6+
- Run git commands from the working directory directly — no `cd` or `-C` flags

CLAUDE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
AGENTS.md

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,15 @@ pip install -e ".[llm,yaml]"
1414
playwright install chromium
1515
```
1616

17+
## Setup
18+
19+
```bash
20+
cp shots.yaml.example shots.yaml # edit with your app's URL and shots
21+
cp .env.example .env # add your OPENAI_API_KEY
22+
```
23+
24+
Both `shots.yaml` and `.env` are gitignored.
25+
1726
## 1) One-time manual login
1827

1928
```bash

pyproject.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
[build-system]
2+
requires = ["hatchling"]
3+
build-backend = "hatchling.build"
4+
5+
[tool.uv]
6+
package = true
7+
18
[project]
29
name = "shots"
310
version = "0.1.0"

shots.yaml renamed to shots.yaml.example

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ shots:
1212
description: >
1313
Capture the main dashboard with KPI cards and a chart visible.
1414
Close any modal, cookie banner, or tour overlay. If needed, use the left nav to reach Dashboard.
15-
- id: wizard-step-4
15+
16+
- id: integrations
1617
description: >
17-
Open the onboarding wizard and click the red button 3 times to reach step 4 summary,
18-
then stop when the summary screen is visible and presentable.
18+
Show Settings -> Integrations page listing available integrations.
1919
viewport_preset: laptop

shots/cli.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,15 @@
1010

1111

1212
def _add_common_run_flags(p: argparse.ArgumentParser) -> None:
13-
p.add_argument("--out-dir", default="shots_out")
13+
p.add_argument("--out-dir", default=None, help="Output directory (overrides config out_dir, default: shots_out).")
1414
p.add_argument("--headed", action="store_true", help="Show the browser (debug).")
15-
p.add_argument("--timeout-ms", type=int, default=45_000)
15+
p.add_argument("--timeout-ms", type=int, default=10_000, help="Page-load/navigation timeout.")
16+
p.add_argument("--action-timeout-ms", type=int, default=5_000, help="Timeout for clicks/typing (fail fast).")
1617

1718
p.add_argument("--use-llm", action="store_true", help="Enable LLM multi-step navigation to acquire each shot.")
18-
p.add_argument("--model", default="gpt-4.1")
19+
p.add_argument("--model", default="gpt-5.2")
1920
p.add_argument("--use-llm-crop", action="store_true", help="Use LLM to choose a crop box.")
21+
p.add_argument("--max-crop-retries", type=int, default=2, help="Max crop validation retries (default: 2).")
2022
p.add_argument("--save-source", action="store_true", help="Save uncropped source images too.")
2123

2224

@@ -77,7 +79,9 @@ def cmd_login(args) -> None:
7779

7880
def cmd_run_config(args) -> None:
7981
cfg = load_config(args.config)
80-
out_dir = pathlib.Path(args.out_dir).resolve()
82+
# CLI --out-dir overrides config out_dir
83+
out_dir_str = args.out_dir if args.out_dir is not None else cfg.out_dir
84+
out_dir = pathlib.Path(out_dir_str).resolve()
8185

8286
w, h, scale, full_page = _resolve_cli_viewport(args)
8387
fallback = viewport_from_values(w, h, scale, full_page=full_page)
@@ -86,10 +90,12 @@ def cmd_run_config(args) -> None:
8690
cfg=cfg,
8791
out_dir=out_dir,
8892
timeout_ms=args.timeout_ms,
93+
action_timeout_ms=args.action_timeout_ms,
8994
headed=args.headed,
9095
use_llm=args.use_llm,
9196
model=args.model,
9297
use_llm_crop=args.use_llm_crop,
98+
max_crop_retries=args.max_crop_retries,
9399
save_source=args.save_source,
94100
cli_fallback_viewport=fallback,
95101
)

shots/config.py

Lines changed: 91 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,26 @@ class ShotSpec:
1919
viewport_preset: str | None = None
2020
viewport: dict[str, int] | None = None # width/height/scale
2121
full_page: bool | None = None
22+
label: str | None = None # per-shot label override
23+
24+
25+
@dataclass
26+
class ShotGroup:
27+
id: str
28+
shots: list[ShotSpec]
29+
output: str = "png" # "png" or "pdf"
30+
label: str | None = None # template string applied to all shots
31+
label_date: bool = False # add date/time line below the label
32+
folder: str | None = None # override subfolder name (defaults to id)
2233

2334

2435
@dataclass
2536
class RunConfig:
2637
base_url: str
2738
start: str
2839
defaults: dict[str, Any]
29-
shots: list[ShotSpec]
40+
groups: list[ShotGroup]
41+
out_dir: str = "shots_out"
3042

3143

3244
def _require_str(obj: dict[str, Any], key: str) -> str:
@@ -35,6 +47,28 @@ def _require_str(obj: dict[str, Any], key: str) -> str:
3547
return obj[key].strip()
3648

3749

50+
def _parse_shot(s: dict[str, Any], ctx: str) -> ShotSpec:
51+
"""Parse a single shot dict into a ShotSpec."""
52+
if not isinstance(s, dict):
53+
raise ValueError(f"{ctx} must be an object.")
54+
sid = _require_str(s, "id")
55+
desc = _require_str(s, "description")
56+
57+
viewport = s.get("viewport")
58+
if viewport is not None and not isinstance(viewport, dict):
59+
raise ValueError(f"{ctx}.viewport must be an object if provided.")
60+
61+
return ShotSpec(
62+
id=sid,
63+
description=desc,
64+
url=str(s["url"]).strip() if s.get("url") else None,
65+
viewport_preset=str(s["viewport_preset"]).strip() if s.get("viewport_preset") else None,
66+
viewport={k: int(v) for k, v in viewport.items()} if viewport else None,
67+
full_page=bool(s["full_page"]) if "full_page" in s else None,
68+
label=str(s["label"]).strip().replace("\\n", "\n") if s.get("label") else None,
69+
)
70+
71+
3872
def load_config(path: str) -> RunConfig:
3973
p = pathlib.Path(path).resolve()
4074
raw_text = p.read_text(encoding="utf-8")
@@ -51,34 +85,63 @@ def load_config(path: str) -> RunConfig:
5185

5286
base_url = _require_str(data, "base_url").rstrip("/")
5387
start = str(data.get("start", "/")).strip() or "/"
88+
out_dir = str(data.get("out_dir", "shots_out")).strip() or "shots_out"
5489
defaults = data.get("defaults", {}) or {}
5590
if not isinstance(defaults, dict):
5691
raise ValueError("defaults must be an object.")
5792

58-
shots_raw = data.get("shots", [])
59-
if not isinstance(shots_raw, list) or not shots_raw:
60-
raise ValueError("shots must be a non-empty list.")
61-
62-
shots: list[ShotSpec] = []
63-
for idx, s in enumerate(shots_raw):
64-
if not isinstance(s, dict):
65-
raise ValueError(f"shots[{idx}] must be an object.")
66-
sid = _require_str(s, "id")
67-
desc = _require_str(s, "description")
68-
69-
viewport = s.get("viewport")
70-
if viewport is not None and not isinstance(viewport, dict):
71-
raise ValueError(f"shots[{idx}].viewport must be an object if provided.")
72-
73-
shots.append(
74-
ShotSpec(
75-
id=sid,
76-
description=desc,
77-
url=str(s["url"]).strip() if s.get("url") else None,
78-
viewport_preset=str(s["viewport_preset"]).strip() if s.get("viewport_preset") else None,
79-
viewport={k: int(v) for k, v in viewport.items()} if viewport else None,
80-
full_page=bool(s["full_page"]) if "full_page" in s else None,
81-
)
82-
)
83-
84-
return RunConfig(base_url=base_url, start=start, defaults=defaults, shots=shots)
93+
has_groups = "groups" in data
94+
has_shots = "shots" in data
95+
96+
if has_groups and has_shots:
97+
raise ValueError("Config cannot have both 'groups' and 'shots'. Use one or the other.")
98+
if not has_groups and not has_shots:
99+
raise ValueError("Config must have either 'groups' or 'shots'.")
100+
101+
groups: list[ShotGroup] = []
102+
103+
if has_groups:
104+
groups_raw = data["groups"]
105+
if not isinstance(groups_raw, list) or not groups_raw:
106+
raise ValueError("groups must be a non-empty list.")
107+
108+
for gi, g in enumerate(groups_raw):
109+
if not isinstance(g, dict):
110+
raise ValueError(f"groups[{gi}] must be an object.")
111+
gid = _require_str(g, "id")
112+
output = str(g.get("output", "png")).strip().lower()
113+
if output not in ("png", "pdf"):
114+
raise ValueError(f"groups[{gi}].output must be 'png' or 'pdf', got '{output}'.")
115+
116+
shots_raw = g.get("shots", [])
117+
if not isinstance(shots_raw, list) or not shots_raw:
118+
raise ValueError(f"groups[{gi}].shots must be a non-empty list.")
119+
120+
shots = [_parse_shot(s, f"groups[{gi}].shots[{si}]") for si, s in enumerate(shots_raw)]
121+
122+
if output == "png" and len(shots) > 1:
123+
raise ValueError(
124+
f"groups[{gi}] ('{gid}'): output='png' requires exactly 1 shot, got {len(shots)}. "
125+
"Use output='pdf' for multi-shot groups."
126+
)
127+
128+
groups.append(ShotGroup(
129+
id=gid,
130+
shots=shots,
131+
output=output,
132+
label=str(g["label"]).strip().replace("\\n", "\n") if g.get("label") else None,
133+
label_date=bool(g.get("label_date", False)),
134+
folder=str(g["folder"]).strip() if g.get("folder") else None,
135+
))
136+
137+
else:
138+
# Flat shots list — auto-wrap each into its own group
139+
shots_raw = data["shots"]
140+
if not isinstance(shots_raw, list) or not shots_raw:
141+
raise ValueError("shots must be a non-empty list.")
142+
143+
for si, s in enumerate(shots_raw):
144+
shot = _parse_shot(s, f"shots[{si}]")
145+
groups.append(ShotGroup(id=shot.id, shots=[shot]))
146+
147+
return RunConfig(base_url=base_url, start=start, defaults=defaults, groups=groups, out_dir=out_dir)

shots/image_ops.py

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from dataclasses import dataclass
55
from io import BytesIO
66

7-
from PIL import Image
7+
from PIL import Image, ImageDraw, ImageFont
88

99

1010
def b64_png(png_bytes: bytes) -> str:
@@ -64,3 +64,81 @@ def crop_png(png_bytes: bytes, crop: Crop) -> bytes:
6464
def get_png_size(png_bytes: bytes) -> tuple[int, int]:
6565
im = Image.open(BytesIO(png_bytes))
6666
return im.size
67+
68+
69+
def _get_font(size: int) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
70+
"""Try common system sans-serif fonts, fall back to Pillow default."""
71+
candidates = [
72+
"/System/Library/Fonts/Helvetica.ttc",
73+
"/System/Library/Fonts/SFNSText.ttf",
74+
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
75+
"/usr/share/fonts/TTF/DejaVuSans.ttf",
76+
"arial.ttf",
77+
"DejaVuSans.ttf",
78+
]
79+
for path in candidates:
80+
try:
81+
return ImageFont.truetype(path, size)
82+
except (OSError, IOError):
83+
continue
84+
return ImageFont.load_default(size=size)
85+
86+
87+
def add_label_banner(png_bytes: bytes, label_text: str, font_size: int = 32) -> bytes:
88+
"""
89+
Add a white banner with black text below the image, separated by a black
90+
line. Additive — does not crop into the screenshot, just extends the canvas
91+
downward. Supports multiline text (e.g. label + date on separate lines).
92+
"""
93+
im = Image.open(BytesIO(png_bytes)).convert("RGBA")
94+
w, h = im.size
95+
96+
font = _get_font(font_size)
97+
line_thickness = 2
98+
padding = font_size # gap above and below text block
99+
line_spacing = font_size // 2 # extra gap between lines
100+
101+
# Measure multiline text height
102+
tmp_draw = ImageDraw.Draw(im)
103+
text_bbox = tmp_draw.multiline_textbbox((0, 0), label_text, font=font, spacing=line_spacing)
104+
text_block_h = text_bbox[3] - text_bbox[1]
105+
106+
banner_h = line_thickness + 2 * padding + text_block_h
107+
108+
# New canvas: original + separator + banner
109+
out = Image.new("RGBA", (w, h + banner_h), (255, 255, 255, 255))
110+
out.paste(im, (0, 0))
111+
112+
draw = ImageDraw.Draw(out)
113+
114+
# Black separator line
115+
draw.rectangle([(0, h), (w, h + line_thickness)], fill=(0, 0, 0, 255))
116+
117+
# Centered multiline text
118+
text_bbox = draw.multiline_textbbox((0, 0), label_text, font=font, spacing=line_spacing)
119+
text_w = text_bbox[2] - text_bbox[0]
120+
text_x = (w - text_w) // 2
121+
text_y = h + line_thickness + padding
122+
draw.multiline_text((text_x, text_y), label_text, fill=(0, 0, 0, 255), font=font, align="center", spacing=line_spacing)
123+
124+
buf = BytesIO()
125+
out.save(buf, format="PNG")
126+
return buf.getvalue()
127+
128+
129+
def pngs_to_pdf(png_bytes_list: list[bytes]) -> bytes:
130+
"""
131+
Combine multiple PNG images into a single PDF (one image per page).
132+
Uses Pillow's built-in PDF support — no extra dependencies.
133+
"""
134+
images: list[Image.Image] = []
135+
for png_bytes in png_bytes_list:
136+
im = Image.open(BytesIO(png_bytes)).convert("RGB")
137+
images.append(im)
138+
139+
buf = BytesIO()
140+
if len(images) == 1:
141+
images[0].save(buf, format="PDF")
142+
else:
143+
images[0].save(buf, format="PDF", save_all=True, append_images=images[1:])
144+
return buf.getvalue()

0 commit comments

Comments
 (0)