Skip to content

Commit e3481fb

Browse files
shirshankaclaude
authored andcommitted
feat(cli): warn about existing CLI tokens during datahub init --sso
After SSO login, query existing access tokens for the user and print a count of CLI tokens with a link to manage them in the UI. This gives users visibility into token accumulation without auto-revoking tokens that may be in use elsewhere. Also removes the broken _revoke_old_cli_tokens call that would crash at runtime and the unused List import. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2522c05 commit e3481fb

File tree

2 files changed

+108
-11
lines changed

2 files changed

+108
-11
lines changed

metadata-ingestion/src/datahub/cli/sso_cli.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
1+
import logging
12
import urllib.parse
23
from datetime import datetime
34
from typing import Tuple
45

56
import click
67
import requests
78

9+
logger = logging.getLogger(__name__)
10+
11+
CLI_TOKEN_PREFIX = "cli token "
12+
813
_INSTALL_HELP = """\
914
The --sso flag requires Playwright and a Chromium browser.
1015
@@ -33,6 +38,51 @@ def _check_playwright_ready() -> None:
3338
) from e
3439

3540

41+
def _warn_about_existing_cli_tokens(
42+
session: requests.Session,
43+
frontend_url: str,
44+
actor_urn: str,
45+
) -> None:
46+
"""Best-effort warning about existing CLI tokens for the current user."""
47+
try:
48+
response = session.post(
49+
f"{frontend_url}/api/v2/graphql",
50+
json={
51+
"query": """query listAccessTokens($input: ListAccessTokenInput!) {
52+
listAccessTokens(input: $input) {
53+
total
54+
tokens { name }
55+
}
56+
}""",
57+
"variables": {
58+
"input": {
59+
"start": 0,
60+
"count": 100,
61+
"filters": [
62+
{
63+
"field": "ownerUrn",
64+
"values": [actor_urn],
65+
}
66+
],
67+
}
68+
},
69+
},
70+
)
71+
response.raise_for_status()
72+
data = response.json()
73+
tokens = data.get("data", {}).get("listAccessTokens", {}).get("tokens", [])
74+
cli_token_count = sum(
75+
1 for t in tokens if t.get("name", "").startswith(CLI_TOKEN_PREFIX)
76+
)
77+
if cli_token_count > 0:
78+
click.echo(
79+
f"⚠ You have {cli_token_count} existing CLI token(s). "
80+
f"Manage them at {frontend_url}/settings/tokens"
81+
)
82+
except Exception:
83+
logger.debug("Failed to check existing CLI tokens", exc_info=True)
84+
85+
3686
def browser_sso_login(
3787
frontend_url: str,
3888
token_duration: str,
@@ -114,6 +164,8 @@ def browser_sso_login(
114164

115165
click.echo(f"✓ Logged in as {actor_urn}")
116166

167+
_warn_about_existing_cli_tokens(session, frontend_url, actor_urn)
168+
117169
# Generate an access token via the frontend GraphQL API
118170
now = datetime.now()
119171
timestamp = now.astimezone().isoformat()

metadata-ingestion/tests/unit/cli/test_sso_cli.py

Lines changed: 56 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import pytest
55

6-
from datahub.cli.sso_cli import browser_sso_login
6+
from datahub.cli.sso_cli import _warn_about_existing_cli_tokens, browser_sso_login
77

88

99
@pytest.fixture
@@ -64,8 +64,14 @@ def test_extracts_cookies_and_generates_token(self, mock_playwright: dict) -> No
6464
with patch("datahub.cli.sso_cli.requests") as mock_requests:
6565
mock_session = MagicMock()
6666
mock_requests.Session.return_value = mock_session
67-
mock_response = MagicMock()
68-
mock_response.json.return_value = {
67+
68+
# First call: listAccessTokens (warning check), second: createAccessToken
69+
list_response = MagicMock()
70+
list_response.json.return_value = {
71+
"data": {"listAccessTokens": {"total": 0, "tokens": []}}
72+
}
73+
create_response = MagicMock()
74+
create_response.json.return_value = {
6975
"data": {
7076
"createAccessToken": {
7177
"accessToken": "generated-sso-token-xyz",
@@ -76,7 +82,7 @@ def test_extracts_cookies_and_generates_token(self, mock_playwright: dict) -> No
7682
}
7783
}
7884
}
79-
mock_session.post.return_value = mock_response
85+
mock_session.post.side_effect = [list_response, create_response]
8086

8187
token_name, access_token = browser_sso_login(
8288
"http://localhost:9002", "ONE_HOUR"
@@ -88,16 +94,16 @@ def test_extracts_cookies_and_generates_token(self, mock_playwright: dict) -> No
8894
# Verify cookies were set on the session
8995
assert mock_session.cookies.set.call_count == 2
9096

91-
# Verify GraphQL call was made
92-
mock_session.post.assert_called_once()
93-
call_args = mock_session.post.call_args
94-
assert call_args[0][0] == "http://localhost:9002/api/v2/graphql"
95-
assert "createAccessToken" in call_args[1]["json"]["query"]
97+
# Verify GraphQL calls were made (list + create)
98+
assert mock_session.post.call_count == 2
99+
create_call = mock_session.post.call_args_list[1]
100+
assert create_call[0][0] == "http://localhost:9002/api/v2/graphql"
101+
assert "createAccessToken" in create_call[1]["json"]["query"]
96102
assert (
97-
call_args[1]["json"]["variables"]["input"]["actorUrn"]
103+
create_call[1]["json"]["variables"]["input"]["actorUrn"]
98104
== "urn:li:corpuser:john.doe"
99105
)
100-
assert call_args[1]["json"]["variables"]["input"]["duration"] == "ONE_HOUR"
106+
assert create_call[1]["json"]["variables"]["input"]["duration"] == "ONE_HOUR"
101107

102108
def test_timeout_raises_error(self, mock_playwright: dict) -> None:
103109
"""Verify timeout if login never completes."""
@@ -150,3 +156,42 @@ def test_graphql_error_raises(self, mock_playwright: dict) -> None:
150156

151157
with pytest.raises(Exception, match="Failed to generate access token"):
152158
browser_sso_login("http://localhost:9002", "ONE_HOUR")
159+
160+
161+
class TestWarnAboutExistingCliTokens:
162+
def test_warns_about_existing_cli_tokens(
163+
self, capsys: pytest.CaptureFixture
164+
) -> None:
165+
session = MagicMock()
166+
response = MagicMock()
167+
response.json.return_value = {
168+
"data": {
169+
"listAccessTokens": {
170+
"total": 4,
171+
"tokens": [
172+
{"name": "cli token 2026-03-01T10:00:00"},
173+
{"name": "cli token 2026-03-02T10:00:00"},
174+
{"name": "cli token 2026-03-03T10:00:00"},
175+
{"name": "manually created token"},
176+
],
177+
}
178+
}
179+
}
180+
session.post.return_value = response
181+
182+
_warn_about_existing_cli_tokens(
183+
session, "https://example.com", "urn:li:corpuser:alice"
184+
)
185+
186+
captured = capsys.readouterr()
187+
assert "3 existing CLI token(s)" in captured.out
188+
assert "https://example.com/settings/tokens" in captured.out
189+
190+
def test_warning_failure_does_not_block(self) -> None:
191+
session = MagicMock()
192+
session.post.side_effect = Exception("network error")
193+
194+
# Should not raise — failure is silently logged
195+
_warn_about_existing_cli_tokens(
196+
session, "https://example.com", "urn:li:corpuser:alice"
197+
)

0 commit comments

Comments
 (0)