|
| 1 | +import logging |
| 2 | +import urllib.parse |
| 3 | +from datetime import datetime |
| 4 | +from typing import Tuple |
| 5 | + |
| 6 | +import click |
| 7 | +import requests |
| 8 | + |
| 9 | +logger = logging.getLogger(__name__) |
| 10 | + |
| 11 | +CLI_TOKEN_PREFIX = "cli token " |
| 12 | + |
| 13 | +_INSTALL_HELP = """\ |
| 14 | +The --sso flag requires Playwright and a Chromium browser. |
| 15 | +
|
| 16 | +Step 1 — Install the Python package (pick your package manager): |
| 17 | + pip install 'acryl-datahub[sso]' |
| 18 | + uv pip install 'acryl-datahub[sso]' |
| 19 | + pip install 'playwright>=1.40.0' |
| 20 | +
|
| 21 | +Step 2 — Download the Chromium browser binary: |
| 22 | + playwright install chromium\ |
| 23 | +""" |
| 24 | + |
| 25 | + |
| 26 | +def _check_playwright_ready() -> None: |
| 27 | + """Verify that playwright is importable. |
| 28 | +
|
| 29 | + Raises click.UsageError with step-by-step install instructions if not. |
| 30 | + If the chromium browser binary is missing, Playwright itself will raise |
| 31 | + a clear error at launch time telling the user to run `playwright install`. |
| 32 | + """ |
| 33 | + try: |
| 34 | + from playwright.sync_api import sync_playwright # noqa: F401 |
| 35 | + except ImportError as e: |
| 36 | + raise click.UsageError( |
| 37 | + "Playwright is not installed.\n\n" + _INSTALL_HELP |
| 38 | + ) from e |
| 39 | + |
| 40 | + |
| 41 | +def _warn_about_existing_cli_tokens( |
| 42 | + session: requests.Session, |
| 43 | + frontend_url: str, |
| 44 | + actor_urn: str, |
| 45 | +) -> None: |
| 46 | + """Best-effort warning about existing CLI tokens for the current user.""" |
| 47 | + try: |
| 48 | + response = session.post( |
| 49 | + f"{frontend_url}/api/v2/graphql", |
| 50 | + json={ |
| 51 | + "query": """query listAccessTokens($input: ListAccessTokenInput!) { |
| 52 | + listAccessTokens(input: $input) { |
| 53 | + total |
| 54 | + tokens { name } |
| 55 | + } |
| 56 | + }""", |
| 57 | + "variables": { |
| 58 | + "input": { |
| 59 | + "start": 0, |
| 60 | + "count": 100, |
| 61 | + "filters": [ |
| 62 | + { |
| 63 | + "field": "ownerUrn", |
| 64 | + "values": [actor_urn], |
| 65 | + } |
| 66 | + ], |
| 67 | + } |
| 68 | + }, |
| 69 | + }, |
| 70 | + ) |
| 71 | + response.raise_for_status() |
| 72 | + data = response.json() |
| 73 | + tokens = data.get("data", {}).get("listAccessTokens", {}).get("tokens", []) |
| 74 | + cli_token_count = sum( |
| 75 | + 1 for t in tokens if t.get("name", "").startswith(CLI_TOKEN_PREFIX) |
| 76 | + ) |
| 77 | + if cli_token_count > 0: |
| 78 | + click.echo( |
| 79 | + f"⚠ You have {cli_token_count} existing CLI token(s). " |
| 80 | + f"Manage them at {frontend_url}/settings/tokens" |
| 81 | + ) |
| 82 | + except Exception: |
| 83 | + logger.debug("Failed to check existing CLI tokens", exc_info=True) |
| 84 | + |
| 85 | + |
| 86 | +def browser_sso_login( |
| 87 | + frontend_url: str, |
| 88 | + token_duration: str, |
| 89 | + timeout_ms: int = 120_000, |
| 90 | + support: bool = False, |
| 91 | +) -> Tuple[str, str]: |
| 92 | + """Open browser for SSO login, extract session, generate access token. |
| 93 | +
|
| 94 | + Args: |
| 95 | + frontend_url: The DataHub frontend URL (e.g. http://localhost:9002). |
| 96 | + token_duration: Token validity duration (e.g. ONE_HOUR). |
| 97 | + timeout_ms: How long to wait for SSO login to complete, in milliseconds. |
| 98 | + support: If True, use /support/authenticate path for DataHub Cloud |
| 99 | + support team access to customer instances. |
| 100 | +
|
| 101 | + Returns: |
| 102 | + Tuple of (token_name, access_token). |
| 103 | +
|
| 104 | + Raises: |
| 105 | + click.ClickException: On timeout or missing session cookies. |
| 106 | + """ |
| 107 | + _check_playwright_ready() |
| 108 | + |
| 109 | + from playwright.sync_api import sync_playwright |
| 110 | + |
| 111 | + auth_path = "/support/authenticate" if support else "/authenticate" |
| 112 | + if support: |
| 113 | + click.echo("Opening browser for support SSO login...") |
| 114 | + else: |
| 115 | + click.echo("Opening browser for SSO login...") |
| 116 | + click.echo("Complete the login in your browser.\n") |
| 117 | + |
| 118 | + with sync_playwright() as p: |
| 119 | + browser = p.chromium.launch(headless=False) |
| 120 | + try: |
| 121 | + context = browser.new_context() |
| 122 | + page = context.new_page() |
| 123 | + |
| 124 | + page.goto(f"{frontend_url}{auth_path}") |
| 125 | + |
| 126 | + # Wait for the actor cookie, which signals successful SSO login. |
| 127 | + actor_urn = None |
| 128 | + try: |
| 129 | + page.wait_for_function( |
| 130 | + """() => document.cookie.split('; ').some(c => c.startsWith('actor='))""", |
| 131 | + timeout=timeout_ms, |
| 132 | + ) |
| 133 | + except Exception as e: |
| 134 | + raise click.ClickException( |
| 135 | + f"SSO login timed out after {timeout_ms // 1000} seconds. " |
| 136 | + "Please try again." |
| 137 | + ) from e |
| 138 | + |
| 139 | + # Extract cookies from the browser context |
| 140 | + cookies = context.cookies() |
| 141 | + finally: |
| 142 | + browser.close() |
| 143 | + |
| 144 | + # Build a requests.Session with the extracted cookies |
| 145 | + session = requests.Session() |
| 146 | + for cookie in cookies: |
| 147 | + session.cookies.set( |
| 148 | + cookie["name"], |
| 149 | + cookie["value"], |
| 150 | + domain=cookie.get("domain", ""), |
| 151 | + path=cookie.get("path", "/"), |
| 152 | + ) |
| 153 | + |
| 154 | + # Extract actor URN from the actor cookie |
| 155 | + for cookie in cookies: |
| 156 | + if cookie["name"] == "actor": |
| 157 | + actor_urn = urllib.parse.unquote(cookie["value"]) |
| 158 | + break |
| 159 | + |
| 160 | + if not actor_urn: |
| 161 | + raise click.ClickException( |
| 162 | + "SSO login succeeded but no actor cookie found. " |
| 163 | + "This may indicate an incompatible DataHub version." |
| 164 | + ) |
| 165 | + |
| 166 | + click.echo(f"✓ Logged in as {actor_urn}") |
| 167 | + |
| 168 | + _warn_about_existing_cli_tokens(session, frontend_url, actor_urn) |
| 169 | + |
| 170 | + # Generate an access token via the frontend GraphQL API |
| 171 | + now = datetime.now() |
| 172 | + timestamp = now.astimezone().isoformat() |
| 173 | + token_name = f"cli token {timestamp}" |
| 174 | + |
| 175 | + json_payload = { |
| 176 | + "query": """mutation createAccessToken($input: CreateAccessTokenInput!) { |
| 177 | + createAccessToken(input: $input) { |
| 178 | + accessToken |
| 179 | + metadata { |
| 180 | + id |
| 181 | + actorUrn |
| 182 | + ownerUrn |
| 183 | + name |
| 184 | + description |
| 185 | + } |
| 186 | + } |
| 187 | + }""", |
| 188 | + "variables": { |
| 189 | + "input": { |
| 190 | + "type": "PERSONAL", |
| 191 | + "actorUrn": actor_urn, |
| 192 | + "duration": token_duration, |
| 193 | + "name": token_name, |
| 194 | + } |
| 195 | + }, |
| 196 | + } |
| 197 | + |
| 198 | + response = session.post( |
| 199 | + f"{frontend_url}/api/v2/graphql", json=json_payload, timeout=30 |
| 200 | + ) |
| 201 | + response.raise_for_status() |
| 202 | + |
| 203 | + data = response.json() |
| 204 | + if data.get("errors"): |
| 205 | + error_msg = data["errors"][0].get("message", str(data["errors"])) |
| 206 | + raise click.ClickException( |
| 207 | + f"Failed to create access token: {error_msg}\n" |
| 208 | + "Check that personal access tokens are enabled and your account has permission." |
| 209 | + ) |
| 210 | + access_token = data.get("data", {}).get("createAccessToken", {}).get("accessToken") |
| 211 | + if not access_token: |
| 212 | + raise click.ClickException( |
| 213 | + "Server returned empty access token. Contact your DataHub administrator." |
| 214 | + ) |
| 215 | + |
| 216 | + return token_name, access_token |
0 commit comments