|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Export a static snapshot of a Glance dashboard for GitHub Pages. |
| 4 | +
|
| 5 | +Glance pages are an SPA shell; real page content is fetched from: |
| 6 | + /api/pages/<slug>/content/ |
| 7 | +
|
| 8 | +This script: |
| 9 | + - fetches the shell HTML for each page |
| 10 | + - fetches rendered content HTML for each page |
| 11 | + - injects content into the shell |
| 12 | + - marks the page as "content-ready" so the loader is hidden (CSS-driven) |
| 13 | + - downloads Glance static assets needed for rendering (bundle.css + referenced fonts/images) |
| 14 | + - copies repo assets/ (images/fonts/json/css) |
| 15 | + - rewrites root-absolute links (/assets, /static, /overview, ...) to include a base path |
| 16 | + suitable for GitHub project pages (e.g. "/<repo-name>") |
| 17 | +""" |
| 18 | + |
| 19 | +from __future__ import annotations |
| 20 | + |
| 21 | +import argparse |
| 22 | +import os |
| 23 | +import posixpath |
| 24 | +import re |
| 25 | +import shutil |
| 26 | +import sys |
| 27 | +import time |
| 28 | +import urllib.parse |
| 29 | +import urllib.request |
| 30 | +from pathlib import Path |
| 31 | + |
| 32 | + |
| 33 | +def _fetch_bytes(url: str, timeout_s: float = 20.0) -> bytes: |
| 34 | + req = urllib.request.Request(url, headers={"User-Agent": "iqss-glance-static-export/1.0"}) |
| 35 | + with urllib.request.urlopen(req, timeout=timeout_s) as resp: |
| 36 | + return resp.read() |
| 37 | + |
| 38 | + |
| 39 | +def _fetch_text(url: str, timeout_s: float = 20.0) -> str: |
| 40 | + return _fetch_bytes(url, timeout_s=timeout_s).decode("utf-8", errors="replace") |
| 41 | + |
| 42 | + |
| 43 | +def _mkdirp(p: Path) -> None: |
| 44 | + p.mkdir(parents=True, exist_ok=True) |
| 45 | + |
| 46 | + |
| 47 | +def _write_bytes(path: Path, data: bytes) -> None: |
| 48 | + _mkdirp(path.parent) |
| 49 | + path.write_bytes(data) |
| 50 | + |
| 51 | + |
| 52 | +def _write_text(path: Path, data: str) -> None: |
| 53 | + _mkdirp(path.parent) |
| 54 | + path.write_text(data, encoding="utf-8") |
| 55 | + |
| 56 | + |
| 57 | +def _normalize_base_path(base_path: str) -> str: |
| 58 | + """ |
| 59 | + "" (empty) means no rewrite (useful for local preview at /). |
| 60 | + "/repo" is the GitHub Pages project base path. |
| 61 | + """ |
| 62 | + base_path = base_path.strip() |
| 63 | + if base_path in ("", "/"): |
| 64 | + return "" |
| 65 | + if not base_path.startswith("/"): |
| 66 | + base_path = "/" + base_path |
| 67 | + return base_path.rstrip("/") |
| 68 | + |
| 69 | + |
| 70 | +def _discover_slugs(config_dir: Path) -> list[str]: |
| 71 | + # Keep this YAML-free: we just regex for `slug: <value>` in config/*.yml. |
| 72 | + slug_re = re.compile(r"^\s*slug:\s*([A-Za-z0-9_-]+)\s*$") |
| 73 | + slugs: list[str] = [] |
| 74 | + seen: set[str] = set() |
| 75 | + |
| 76 | + for yml in sorted(config_dir.glob("*.yml")): |
| 77 | + try: |
| 78 | + for line in yml.read_text(encoding="utf-8", errors="replace").splitlines(): |
| 79 | + m = slug_re.match(line) |
| 80 | + if not m: |
| 81 | + continue |
| 82 | + slug = m.group(1) |
| 83 | + if slug in seen: |
| 84 | + continue |
| 85 | + seen.add(slug) |
| 86 | + slugs.append(slug) |
| 87 | + except FileNotFoundError: |
| 88 | + continue |
| 89 | + |
| 90 | + # Prefer Home first for predictable output. |
| 91 | + if "home" in seen: |
| 92 | + slugs = ["home"] + [s for s in slugs if s != "home"] |
| 93 | + return slugs |
| 94 | + |
| 95 | + |
| 96 | +def _extract_bundle_css_path(shell_html: str) -> str: |
| 97 | + # Example: <link rel="stylesheet" href='/static/<hash>/css/bundle.css'> |
| 98 | + m = re.search( |
| 99 | + r"<link[^>]+href=['\"](/static/[^'\"]+/css/bundle\.css)['\"][^>]*>", |
| 100 | + shell_html, |
| 101 | + flags=re.IGNORECASE, |
| 102 | + ) |
| 103 | + if not m: |
| 104 | + raise RuntimeError("Could not find bundle.css path in page HTML") |
| 105 | + return m.group(1) |
| 106 | + |
| 107 | + |
| 108 | +def _extract_page_js_path(shell_html: str) -> str | None: |
| 109 | + m = re.search( |
| 110 | + r"<script[^>]+src=['\"](/static/[^'\"]+/js/page\.js)['\"][^>]*></script>", |
| 111 | + shell_html, |
| 112 | + flags=re.IGNORECASE, |
| 113 | + ) |
| 114 | + return m.group(1) if m else None |
| 115 | + |
| 116 | + |
| 117 | +def _inject_content(shell_html: str, content_html: str) -> str: |
| 118 | + # 1) Inject content into the placeholder. |
| 119 | + # The shell contains: |
| 120 | + # <div class="page-content" id="page-content"></div> |
| 121 | + injected, n = re.subn( |
| 122 | + r'(<div[^>]*\bid=["\']page-content["\'][^>]*>)\s*</div>', |
| 123 | + r"\1" + content_html + r"</div>", |
| 124 | + shell_html, |
| 125 | + count=1, |
| 126 | + flags=re.IGNORECASE | re.DOTALL, |
| 127 | + ) |
| 128 | + if n != 1: |
| 129 | + raise RuntimeError("Failed to inject page content (page-content div not found)") |
| 130 | + |
| 131 | + # 2) Mark as content-ready so Glance CSS shows content and hides loader. |
| 132 | + # <main class="page" ... aria-busy="true"> |
| 133 | + # -> <main class="page content-ready" ... aria-busy="false"> |
| 134 | + injected = re.sub( |
| 135 | + r'(<main[^>]*\bclass=["\'])page(\b[^"\']*["\'][^>]*>)', |
| 136 | + r"\1page content-ready\2", |
| 137 | + injected, |
| 138 | + count=1, |
| 139 | + flags=re.IGNORECASE, |
| 140 | + ) |
| 141 | + injected = re.sub( |
| 142 | + r'(\baria-busy=["\'])true(["\'])', |
| 143 | + r"\1false\2", |
| 144 | + injected, |
| 145 | + count=1, |
| 146 | + flags=re.IGNORECASE, |
| 147 | + ) |
| 148 | + |
| 149 | + # 3) Remove the SPA JS boot file so it doesn't try to re-fetch /api at runtime. |
| 150 | + injected = re.sub( |
| 151 | + r"<script[^>]+src=['\"]/static/[^'\"]+/js/page\.js['\"][^>]*></script>\s*", |
| 152 | + "", |
| 153 | + injected, |
| 154 | + count=1, |
| 155 | + flags=re.IGNORECASE, |
| 156 | + ) |
| 157 | + return injected |
| 158 | + |
| 159 | + |
| 160 | +def _rewrite_base_paths(text: str, base_path: str) -> str: |
| 161 | + """ |
| 162 | + Prefix root-absolute paths with base_path: |
| 163 | + href="/assets/.." -> href="/<base>/assets/.." |
| 164 | + Avoid protocol-relative URLs like href="//example.com". |
| 165 | + """ |
| 166 | + if not base_path: |
| 167 | + return text |
| 168 | + |
| 169 | + # Common HTML attributes with root-absolute URLs. |
| 170 | + for attr in ("href", "src", "action"): |
| 171 | + text = re.sub( |
| 172 | + rf'{attr}="/(?!/)', |
| 173 | + f'{attr}="{base_path}/', |
| 174 | + text, |
| 175 | + ) |
| 176 | + text = re.sub( |
| 177 | + rf"{attr}='/(?!/)", |
| 178 | + f"{attr}='{base_path}/", |
| 179 | + text, |
| 180 | + ) |
| 181 | + |
| 182 | + # CSS url() root-absolute URLs. |
| 183 | + text = re.sub(r"url\('/(?!/)", f"url('{base_path}/", text) |
| 184 | + text = re.sub(r'url\("/(?!/)', f'url("{base_path}/', text) |
| 185 | + |
| 186 | + # Glance uses a relative manifest href (manifest.json) which breaks on /<slug>/ pages. |
| 187 | + # Make it base-absolute. |
| 188 | + text = re.sub( |
| 189 | + r"""href=(['"])manifest\.json""", |
| 190 | + rf"href=\1{base_path}/manifest.json", |
| 191 | + text, |
| 192 | + flags=re.IGNORECASE, |
| 193 | + ) |
| 194 | + return text |
| 195 | + |
| 196 | + |
| 197 | +def _download_static_css_and_deps(glance_url: str, out_dir: Path, bundle_css_path: str) -> None: |
| 198 | + css_url = urllib.parse.urljoin(glance_url.rstrip("/") + "/", bundle_css_path.lstrip("/")) |
| 199 | + css_bytes = _fetch_bytes(css_url) |
| 200 | + css_out = out_dir / bundle_css_path.lstrip("/") |
| 201 | + _write_bytes(css_out, css_bytes) |
| 202 | + |
| 203 | + css_text = css_bytes.decode("utf-8", errors="replace") |
| 204 | + css_dir = "/" + str(Path(bundle_css_path).parent).lstrip("/") |
| 205 | + |
| 206 | + # Extract url(...) references. This intentionally ignores @import (not expected here). |
| 207 | + # Handles url(foo), url('foo'), url("foo"). |
| 208 | + url_re = re.compile(r"url\(\s*(['\"]?)([^'\"\)]+)\1\s*\)") |
| 209 | + refs: set[str] = set() |
| 210 | + |
| 211 | + for m in url_re.finditer(css_text): |
| 212 | + ref = m.group(2).strip() |
| 213 | + if not ref or ref.startswith("data:"): |
| 214 | + continue |
| 215 | + if ref.startswith("http://") or ref.startswith("https://"): |
| 216 | + continue |
| 217 | + |
| 218 | + if ref.startswith("/"): |
| 219 | + refs.add(ref) |
| 220 | + continue |
| 221 | + |
| 222 | + # Resolve relative to the CSS directory. |
| 223 | + resolved = posixpath.normpath(posixpath.join(css_dir, ref)) |
| 224 | + if not resolved.startswith("/"): |
| 225 | + resolved = "/" + resolved |
| 226 | + refs.add(resolved) |
| 227 | + |
| 228 | + for ref in sorted(refs): |
| 229 | + ref_url = urllib.parse.urljoin(glance_url.rstrip("/") + "/", ref.lstrip("/")) |
| 230 | + try: |
| 231 | + data = _fetch_bytes(ref_url) |
| 232 | + except Exception as e: |
| 233 | + raise RuntimeError(f"Failed to download static dependency {ref} from {ref_url}: {e}") from e |
| 234 | + _write_bytes(out_dir / ref.lstrip("/"), data) |
| 235 | + |
| 236 | + |
| 237 | +def main() -> int: |
| 238 | + parser = argparse.ArgumentParser() |
| 239 | + parser.add_argument("--glance-url", default=os.environ.get("GLANCE_URL", "http://127.0.0.1:8080")) |
| 240 | + parser.add_argument("--out-dir", default=os.environ.get("OUT_DIR", "dist")) |
| 241 | + parser.add_argument("--base-path", default=os.environ.get("BASE_PATH", "")) |
| 242 | + parser.add_argument("--config-dir", default=os.environ.get("CONFIG_DIR", "config")) |
| 243 | + parser.add_argument("--timeout-seconds", type=int, default=60) |
| 244 | + args = parser.parse_args() |
| 245 | + |
| 246 | + glance_url = args.glance_url.rstrip("/") |
| 247 | + out_dir = Path(args.out_dir) |
| 248 | + base_path = _normalize_base_path(args.base_path) |
| 249 | + config_dir = Path(args.config_dir) |
| 250 | + |
| 251 | + slugs = _discover_slugs(config_dir) |
| 252 | + if not slugs: |
| 253 | + print(f"ERROR: No page slugs found under {config_dir}/", file=sys.stderr) |
| 254 | + return 2 |
| 255 | + |
| 256 | + # Wait for Glance to be up (use /home if present, else /). |
| 257 | + start = time.time() |
| 258 | + probe_path = "/home" if "home" in slugs else "/" |
| 259 | + while True: |
| 260 | + try: |
| 261 | + _fetch_bytes(glance_url + probe_path, timeout_s=5.0) |
| 262 | + break |
| 263 | + except Exception: |
| 264 | + if time.time() - start > args.timeout_seconds: |
| 265 | + print(f"ERROR: Glance did not become ready at {glance_url} within timeout", file=sys.stderr) |
| 266 | + return 3 |
| 267 | + time.sleep(0.5) |
| 268 | + |
| 269 | + if out_dir.exists(): |
| 270 | + shutil.rmtree(out_dir) |
| 271 | + _mkdirp(out_dir) |
| 272 | + |
| 273 | + # Copy repo assets/ as-is (data, fonts, images, custom CSS). |
| 274 | + repo_assets = Path("assets") |
| 275 | + if not repo_assets.is_dir(): |
| 276 | + print("ERROR: assets/ directory not found in repo root", file=sys.stderr) |
| 277 | + return 4 |
| 278 | + shutil.copytree(repo_assets, out_dir / "assets") |
| 279 | + |
| 280 | + # Fetch manifest.json (used by Glance shell). |
| 281 | + try: |
| 282 | + manifest = _fetch_bytes(glance_url + "/manifest.json") |
| 283 | + _write_bytes(out_dir / "manifest.json", manifest) |
| 284 | + except Exception: |
| 285 | + # Not fatal for static rendering. |
| 286 | + pass |
| 287 | + |
| 288 | + # Use one shell page to find Glance's static bundle CSS path. |
| 289 | + sample_shell = _fetch_text(glance_url + ("/home" if "home" in slugs else f"/{slugs[0]}")) |
| 290 | + bundle_css_path = _extract_bundle_css_path(sample_shell) |
| 291 | + _download_static_css_and_deps(glance_url, out_dir, bundle_css_path) |
| 292 | + |
| 293 | + # Build each page. |
| 294 | + for slug in slugs: |
| 295 | + shell_html = _fetch_text(glance_url + f"/{slug}") |
| 296 | + content_html = _fetch_text(glance_url + f"/api/pages/{slug}/content/") |
| 297 | + page_html = _inject_content(shell_html, content_html) |
| 298 | + _write_text(out_dir / slug / "index.html", page_html) |
| 299 | + |
| 300 | + if slug == "home": |
| 301 | + _write_text(out_dir / "index.html", page_html) |
| 302 | + |
| 303 | + # Rewrite base paths in exported HTML/CSS (notably assets/user.css contains /assets/... URLs). |
| 304 | + for p in out_dir.rglob("*"): |
| 305 | + if not p.is_file(): |
| 306 | + continue |
| 307 | + if p.suffix.lower() not in (".html", ".css"): |
| 308 | + continue |
| 309 | + try: |
| 310 | + original = p.read_text(encoding="utf-8", errors="replace") |
| 311 | + except Exception: |
| 312 | + continue |
| 313 | + rewritten = _rewrite_base_paths(original, base_path) |
| 314 | + if rewritten != original: |
| 315 | + p.write_text(rewritten, encoding="utf-8") |
| 316 | + |
| 317 | + # GitHub Pages: ensure Jekyll is disabled. |
| 318 | + _write_text(out_dir / ".nojekyll", "") |
| 319 | + |
| 320 | + print(f"Exported {len(slugs)} pages to {out_dir}/ (base path: {base_path or '(none)'})") |
| 321 | + return 0 |
| 322 | + |
| 323 | + |
| 324 | +if __name__ == "__main__": |
| 325 | + raise SystemExit(main()) |
0 commit comments