|
| 1 | +import kernel |
| 2 | +from kernel import Kernel |
| 3 | +from playwright.async_api import async_playwright |
| 4 | +from typing import TypedDict |
| 5 | +from urllib.parse import urlparse |
| 6 | +from datetime import datetime |
| 7 | + |
| 8 | +client = Kernel() |
| 9 | + |
| 10 | +# Create a new Kernel app |
| 11 | +app = kernel.App("python-persistent-browser") |
| 12 | + |
| 13 | +class PageTitleInput(TypedDict): |
| 14 | + url: str |
| 15 | + |
| 16 | +class PageTitleOutput(TypedDict): |
| 17 | + title: str |
| 18 | + elapsed_ms: float |
| 19 | + |
| 20 | +@app.action("get-page-title") |
| 21 | +async def get_page_title(ctx: kernel.KernelContext, input_data: PageTitleInput) -> PageTitleOutput: |
| 22 | + """ |
| 23 | + A function that extracts the title of a webpage |
| 24 | + |
| 25 | + Args: |
| 26 | + ctx: Kernel context containing invocation information |
| 27 | + input_data: An object with a URL property |
| 28 | + |
| 29 | + Returns: |
| 30 | + A dictionary containing the page title |
| 31 | + """ |
| 32 | + url = input_data.get("url") |
| 33 | + if not url or not isinstance(url, str): |
| 34 | + raise ValueError("URL is required and must be a string") |
| 35 | + |
| 36 | + # Add https:// if no protocol is present |
| 37 | + if not url.startswith(('http://', 'https://')): |
| 38 | + url = f"https://{url}" |
| 39 | + |
| 40 | + # Validate the URL |
| 41 | + try: |
| 42 | + urlparse(url) |
| 43 | + except Exception: |
| 44 | + raise ValueError(f"Invalid URL: {url}") |
| 45 | + |
| 46 | + # Create a browser instance using the context's invocation_id and a persistent id |
| 47 | + kernel_browser = client.browsers.create( |
| 48 | + invocation_id=ctx.invocation_id, |
| 49 | + persistence={"id": "my-awesome-persistent-browser-2"} |
| 50 | + ) |
| 51 | + print("Kernel browser live view url: ", kernel_browser.browser_live_view_url) |
| 52 | + |
| 53 | + async with async_playwright() as playwright: |
| 54 | + browser = await playwright.chromium.connect_over_cdp(kernel_browser.cdp_ws_url) |
| 55 | + try: |
| 56 | + now = datetime.now() |
| 57 | + context = len(browser.contexts) > 0 and browser.contexts[0] or await browser.new_context() |
| 58 | + page = len(context.pages) > 0 and context.pages[0] or await context.new_page() |
| 59 | + current_url = page.url |
| 60 | + print("Current url: ", current_url) |
| 61 | + if current_url != url: |
| 62 | + print("Not at url, navigating to it") |
| 63 | + await page.goto(url) |
| 64 | + else: |
| 65 | + print("Already at url, skipping navigation") |
| 66 | + # for some reason going straight to page.title() leads to an error: Page.title: Execution context was destroyed, most likely because of a navigation |
| 67 | + # calling bring_to_front() seems to fix it :shrug: |
| 68 | + await page.bring_to_front() |
| 69 | + title = await page.title() |
| 70 | + elapsedMilliseconds = (datetime.now() - now).total_seconds() * 1000 |
| 71 | + return {"title": title, "elapsed_ms": elapsedMilliseconds} |
| 72 | + finally: |
| 73 | + await browser.close() |
0 commit comments