Skip to content

Commit 6c169dc

Browse files
authored
chromium headless unikernel (#8)
* chromium headless unikernel * updates * test script * move things around * fix * create inst * create a kernel user and run chromium as that; hardcode chromium flags :/ kernel panic if the list of args is too long ``` [ 0.023198] Kernel panic - not syncing: Too many boot init vars at `/usr/bin/wrapper.sh' ``` * cleanup * update readme * script to check memory usage of the unikraft instance * lots of updates * show network bytes
1 parent 2c9d6bf commit 6c169dc

File tree

20 files changed

+1159
-0
lines changed

20 files changed

+1159
-0
lines changed

shared/cdp-test/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
screenshot.png
2+
screenshot-before.png

shared/cdp-test/.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.11

shared/cdp-test/main.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import sys
2+
import asyncio
3+
import json
4+
import re
5+
import socket
6+
from pathlib import Path
7+
from urllib.parse import urljoin, urlparse
8+
from urllib.request import urlopen, Request
9+
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError # type: ignore
10+
import aiohttp # type: ignore
11+
import contextlib
12+
13+
async def run(cdp_url: str) -> None:
14+
"""Connect to an existing Chromium instance via CDP, navigate, and screenshot."""
15+
async with async_playwright() as p:
16+
# Connect to the running browser exposed via the CDP websocket URL.
17+
browser = await p.chromium.connect_over_cdp(cdp_url)
18+
19+
# Re-use the first context if present, otherwise create a fresh one.
20+
if browser.contexts:
21+
context = browser.contexts[0]
22+
else:
23+
context = await browser.new_context()
24+
25+
# Re-use the first page if present, otherwise create a fresh one.
26+
page = context.pages[0] if context.pages else await context.new_page()
27+
28+
# Snapshot the page as-is for debugging purposes.
29+
print(f"Page URL: {page.url}")
30+
print(f"Taking screenshot before navigation")
31+
await page.screenshot(path="screenshot-before.png", full_page=True)
32+
33+
# Decide destination URL.
34+
target_url = (
35+
"https://www.apple.com"
36+
if "apple.com" not in page.url
37+
else "https://www.microsoft.com"
38+
)
39+
40+
print(f"Navigating to {target_url} …", file=sys.stderr)
41+
42+
try:
43+
# First wait only for DOMContentLoaded – many modern sites keep long-polling
44+
# connections alive which makes the stricter "networkidle" heuristic unreliable.
45+
await page.goto(target_url, wait_until="domcontentloaded", timeout=60_000)
46+
47+
# Optionally wait for a quieter network but don't fail if it never settles.
48+
try:
49+
await page.wait_for_load_state("networkidle", timeout=10_000)
50+
except PlaywrightTimeoutError:
51+
print("networkidle state not reached within 10 s – proceeding", file=sys.stderr)
52+
53+
except PlaywrightTimeoutError:
54+
print(f"Navigation to {target_url} timed out after 60 s", file=sys.stderr)
55+
# Capture the state for post-mortem analysis.
56+
await page.screenshot(path="screenshot-error.png", full_page=True)
57+
raise
58+
59+
# Ensure output directory and save screenshot.
60+
out_path = Path("screenshot.png")
61+
await page.screenshot(path=str(out_path), full_page=True)
62+
print(f"Screenshot saved to {out_path.resolve()}")
63+
64+
await browser.close()
65+
66+
67+
# ---------------- CLI entrypoint ---------------- #
68+
69+
def _resolve_cdp_url(arg: str) -> str:
70+
"""Resolve the provided argument to a CDP websocket URL.
71+
72+
If *arg* already looks like a ws:// or wss:// URL, return it unchanged.
73+
Otherwise, treat it as a DevTools HTTP endpoint (e.g. http://localhost:9222
74+
or just localhost:9222), fetch /json/version, and extract the
75+
'webSocketDebuggerUrl'.
76+
"""
77+
78+
# Ensure scheme. Default to http:// if none supplied.
79+
if not arg.startswith(("http://", "https://")):
80+
arg = f"http://{arg}"
81+
82+
version_url = urljoin(arg.rstrip("/") + "/", "json/version")
83+
try:
84+
85+
# Chromium devtools HTTP server only accepts Host headers that are an
86+
# IP literal or "localhost". If the caller passed a hostname, resolve
87+
# it to an IP so that the request is not rejected.
88+
parsed = urlparse(version_url)
89+
raw_host = parsed.hostname or "localhost"
90+
# Quick-and-dirty IP-literal check (IPv4 or bracket-less IPv6).
91+
_IP_RE = re.compile(r"^(?:\d+\.\d+\.\d+\.\d+|[0-9a-fA-F:]+)$")
92+
if raw_host != "localhost" and not _IP_RE.match(raw_host):
93+
try:
94+
raw_host = socket.gethostbyname(raw_host)
95+
except Exception: # noqa: BLE001
96+
# Fall back to localhost if resolution fails; devtools handler
97+
# will at least accept it rather than closing the connection.
98+
raw_host = "localhost"
99+
host_header = raw_host
100+
if parsed.port:
101+
host_header = f"{host_header}:{parsed.port}"
102+
print(f"Host header: {host_header}")
103+
req = Request(version_url, headers={"Host": host_header})
104+
with urlopen(req) as resp:
105+
data = json.load(resp)
106+
print(f"Data: {data}")
107+
# change ws:// to ws:// if parsed was https. Also change IP back to the hostname
108+
if parsed.scheme == "https":
109+
data["webSocketDebuggerUrl"] = data["webSocketDebuggerUrl"].replace("ws://", "wss://")
110+
data["webSocketDebuggerUrl"] = data["webSocketDebuggerUrl"].replace(raw_host, parsed.hostname)
111+
print(f"debugger url: {data['webSocketDebuggerUrl']}")
112+
return data["webSocketDebuggerUrl"]
113+
except Exception as exc: # noqa: BLE001
114+
print(
115+
f"Failed to retrieve webSocketDebuggerUrl from {version_url}: {exc}",
116+
file=sys.stderr,
117+
)
118+
sys.exit(1)
119+
120+
# ---------------- keep-alive task ---------------- #
121+
122+
123+
async def _keep_alive(endpoint: str) -> None:
124+
"""Periodically send a GET request to *endpoint* to keep the instance alive."""
125+
# Ensure scheme; default to http:// if missing.
126+
if not endpoint.startswith(("http://", "https://")):
127+
endpoint = f"http://{endpoint}"
128+
129+
async with aiohttp.ClientSession() as session:
130+
while True:
131+
try:
132+
async with session.get(endpoint) as resp:
133+
# Consume the response body to finish the request.
134+
await resp.read()
135+
except Exception as exc: # noqa: BLE001
136+
print(f"Keep-alive request to {endpoint} failed: {exc}", file=sys.stderr)
137+
138+
await asyncio.sleep(1)
139+
140+
141+
async def _async_main(endpoint_arg: str) -> None:
142+
"""Resolve CDP URL, start keep-alive task, and run the browser automation."""
143+
144+
cdp_url = _resolve_cdp_url(endpoint_arg)
145+
146+
# Start the keep-alive loop.
147+
keep_alive_task = asyncio.create_task(_keep_alive(endpoint_arg))
148+
149+
try:
150+
await run(cdp_url)
151+
finally:
152+
# Ensure the keep-alive task is cancelled cleanly when run() completes.
153+
keep_alive_task.cancel()
154+
with contextlib.suppress(asyncio.CancelledError):
155+
await keep_alive_task
156+
157+
def main() -> None:
158+
if len(sys.argv) < 2:
159+
print("Usage: python main.py <DevTools HTTP endpoint>", file=sys.stderr)
160+
sys.exit(1)
161+
asyncio.run(_async_main(sys.argv[1]))
162+
163+
if __name__ == "__main__":
164+
main()

shared/cdp-test/pyproject.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[project]
2+
name = "cdp-test"
3+
version = "0.1.0"
4+
description = "Add your description here"
5+
readme = "README.md"
6+
requires-python = ">=3.11"
7+
dependencies = [
8+
"aiohttp>=3.12.13",
9+
"playwright>=1.52.0",
10+
]

0 commit comments

Comments
 (0)