Skip to content

Commit a28d01c

Browse files
authored
Enhance diagnostics and error reporting in authentication workflow, including process exit code, stderr tail, and duration tracking. Add retry logic with detailed diagnostics for failed attempts. (#121)
1 parent 3583f76 commit a28d01c

File tree

2 files changed

+83
-4
lines changed

2 files changed

+83
-4
lines changed

.github/workflows/client.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import os
1010
import subprocess
1111
import tempfile
12+
import time
1213
from dataclasses import dataclass, field
1314
from pathlib import Path
1415

@@ -30,6 +31,9 @@ class AuthCheckResult:
3031
success: bool
3132
auth_methods: list[AuthMethod] = field(default_factory=list)
3233
error: str | None = None
34+
stderr_tail: str | None = None
35+
duration_seconds: float | None = None
36+
process_exit_code: int | None = None
3337

3438

3539
def parse_auth_methods(auth_methods_raw: list[dict]) -> list[AuthMethod]:
@@ -122,6 +126,29 @@ def read_jsonrpc(proc: subprocess.Popen, timeout: float) -> dict | None:
122126
) from e
123127

124128

129+
def _collect_proc_diagnostics(proc: subprocess.Popen) -> tuple[str | None, int | None]:
130+
"""Collect stderr tail and exit code from a process (non-blocking).
131+
132+
Returns:
133+
(stderr_tail, exit_code) — either may be None if unavailable.
134+
"""
135+
import select
136+
137+
exit_code = proc.poll()
138+
139+
stderr_tail: str | None = None
140+
try:
141+
ready, _, _ = select.select([proc.stderr], [], [], 0.5)
142+
if ready:
143+
data = proc.stderr.read(8192)
144+
if data:
145+
stderr_tail = data[-4000:]
146+
except Exception:
147+
pass
148+
149+
return stderr_tail, exit_code
150+
151+
125152
def run_auth_check(
126153
cmd: list[str],
127154
cwd: Path,
@@ -151,6 +178,7 @@ def run_auth_check(
151178
full_env["HOME"] = sandbox_home
152179

153180
proc = None
181+
t0 = time.monotonic()
154182
try:
155183
# Make binary executable if needed
156184
exe_path = Path(cmd[0])
@@ -194,15 +222,25 @@ def run_auth_check(
194222
response = read_jsonrpc(proc, timeout)
195223

196224
if response is None:
225+
duration = time.monotonic() - t0
226+
stderr_tail, exit_code = _collect_proc_diagnostics(proc)
197227
return AuthCheckResult(
198228
success=False,
199229
error=f"Timeout after {timeout}s waiting for initialize response",
230+
stderr_tail=stderr_tail,
231+
duration_seconds=duration,
232+
process_exit_code=exit_code,
200233
)
201234

202235
if "error" in response:
236+
duration = time.monotonic() - t0
237+
stderr_tail, exit_code = _collect_proc_diagnostics(proc)
203238
return AuthCheckResult(
204239
success=False,
205240
error=f"Agent error: {response['error']}",
241+
stderr_tail=stderr_tail,
242+
duration_seconds=duration,
243+
process_exit_code=exit_code,
206244
)
207245

208246
result = response.get("result", {})
@@ -214,16 +252,32 @@ def run_auth_check(
214252
# Validate
215253
is_valid, message = validate_auth_methods(auth_methods)
216254

255+
if is_valid:
256+
return AuthCheckResult(
257+
success=True,
258+
auth_methods=auth_methods,
259+
)
260+
261+
duration = time.monotonic() - t0
262+
stderr_tail, exit_code = _collect_proc_diagnostics(proc)
217263
return AuthCheckResult(
218-
success=is_valid,
264+
success=False,
219265
auth_methods=auth_methods,
220-
error=None if is_valid else message,
266+
error=message,
267+
stderr_tail=stderr_tail,
268+
duration_seconds=duration,
269+
process_exit_code=exit_code,
221270
)
222271

223272
except Exception as e:
273+
duration = time.monotonic() - t0
274+
stderr_tail, exit_code = _collect_proc_diagnostics(proc) if proc else (None, None)
224275
return AuthCheckResult(
225276
success=False,
226277
error=f"Error during auth check: {type(e).__name__}: {e}",
278+
stderr_tail=stderr_tail,
279+
duration_seconds=duration,
280+
process_exit_code=exit_code,
227281
)
228282
finally:
229283
if proc:

.github/workflows/verify_agents.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,19 @@ def build_agent_command(
434434
return cmd, cwd, env
435435

436436

437+
def _print_auth_diagnostics(result) -> None:
438+
"""Print diagnostic details from a failed AuthCheckResult."""
439+
if result.duration_seconds is not None:
440+
print(f" Duration: {result.duration_seconds:.1f}s")
441+
if result.process_exit_code is not None:
442+
print(f" Process exit code: {result.process_exit_code}")
443+
if result.stderr_tail:
444+
lines = result.stderr_tail.rstrip().split("\n")
445+
# Show last 20 lines max
446+
for line in lines[-20:]:
447+
print(f" stderr: {line}")
448+
449+
437450
def verify_auth(
438451
agent: dict,
439452
dist_type: str,
@@ -489,8 +502,20 @@ def verify_auth(
489502
if result.success:
490503
methods_info = ", ".join(f"{m.id}({m.type})" for m in result.auth_methods if m.type)
491504
return Result(agent_id, dist_type, True, f"Auth OK: {methods_info}")
492-
else:
493-
return Result(agent_id, dist_type, False, result.error or "Auth check failed")
505+
506+
# Print diagnostics for failed attempt
507+
_print_auth_diagnostics(result)
508+
509+
# Retry once for transient failures
510+
print(" Retrying...")
511+
result = run_auth_check(cmd, cwd, env, auth_timeout)
512+
513+
if result.success:
514+
methods_info = ", ".join(f"{m.id}({m.type})" for m in result.auth_methods if m.type)
515+
return Result(agent_id, dist_type, True, f"Auth OK (retry): {methods_info}")
516+
517+
_print_auth_diagnostics(result)
518+
return Result(agent_id, dist_type, False, result.error or "Auth check failed")
494519

495520

496521
def verify_agent(

0 commit comments

Comments
 (0)