88import os
99import shlex
1010import subprocess
11+ import textwrap
12+ from datetime import datetime , timezone
1113from pathlib import Path
14+ from time import perf_counter
1215from typing import Iterable
1316
1417
4043ITERATIONS = int (os .environ .get ("ITERATIONS" , "1" ))
4144BRAINTRUST_UPLOADER = BraintrustUploader (os .environ )
4245
46+
47+ def _log (message : str ) -> None :
48+ """Emit a timestamped log line that pytest `-s` will surface immediately."""
49+ timestamp = datetime .now (timezone .utc ).isoformat (timespec = "seconds" )
50+ print (f"[{ timestamp } ] { message } " , flush = True )
51+
52+
53+ def _summarise_command (parts : Iterable [str ]) -> str :
54+ """Return a shell-style command string for debugging output."""
55+ sequence = parts if isinstance (parts , list ) else list (parts )
56+ if hasattr (shlex , "join" ):
57+ return shlex .join (sequence )
58+ # ``shlex.join`` was added in Python 3.8; keep a safe fallback just in case.
59+ return " " .join (shlex .quote (part ) for part in sequence )
60+
61+
62+ def _preview_output (output : str , * , limit : int = 400 ) -> str :
63+ """Provide a trimmed preview of command output for quick debugging."""
64+ return textwrap .shorten (output .strip (), width = limit , placeholder = " …" )
65+
4366pytestmark = [
4467 pytest .mark .skipif (
4568 not RUN_LIVE ,
@@ -90,14 +113,62 @@ def _build_command(prompt: str, model: str, resource_group: str, cluster_name: s
90113
91114
92115def _run_cli (command : Iterable [str ], env : dict [str , str ]) -> str :
116+ command_list = list (command )
117+ command_display = _summarise_command (command_list )
118+ _log (f"Invoking AKS Agent CLI: { command_display } " )
119+ start = perf_counter ()
120+
121+ # Add timeout and real-time stderr display for debugging
122+ import sys
123+ import threading
124+ timeout_seconds = 600 # 10 minutes timeout
125+
93126 try :
94- result = subprocess .run ( # noqa: S603
95- list (command ),
96- check = True ,
97- capture_output = True ,
127+ # Use Popen for real-time output visibility
128+ process = subprocess .Popen ( # noqa: S603
129+ command_list ,
130+ stdout = subprocess .PIPE ,
131+ stderr = subprocess .PIPE ,
98132 text = True ,
99133 env = env ,
100134 )
135+
136+ # Thread to print stderr in real-time
137+ stderr_lines = []
138+ def print_stderr ():
139+ for line in iter (process .stderr .readline , '' ):
140+ if line :
141+ print (f"[STDERR] { line .rstrip ()} " , file = sys .stderr , flush = True )
142+ stderr_lines .append (line )
143+
144+ stderr_thread = threading .Thread (target = print_stderr , daemon = True )
145+ stderr_thread .start ()
146+
147+ # Wait with timeout
148+ try :
149+ stdout , _ = process .communicate (timeout = timeout_seconds )
150+ stderr_thread .join (timeout = 1 )
151+ stderr = '' .join (stderr_lines )
152+ except subprocess .TimeoutExpired :
153+ process .kill ()
154+ stdout , stderr_remainder = process .communicate ()
155+ stderr = '' .join (stderr_lines ) + (stderr_remainder or '' )
156+ _log (f"[ERROR] CLI command timed out after { timeout_seconds } s" )
157+ pytest .fail (
158+ f"AKS Agent CLI call timed out after { timeout_seconds } s\n "
159+ f"Command: { command_display } \n "
160+ f"Stdout: { stdout } \n "
161+ f"Stderr: { stderr } "
162+ )
163+
164+ if process .returncode != 0 :
165+ raise subprocess .CalledProcessError (
166+ process .returncode , command_list , stdout , stderr
167+ )
168+
169+ result = subprocess .CompletedProcess (
170+ command_list , process .returncode , stdout , stderr
171+ )
101172 except subprocess .CalledProcessError as exc : # pragma: no cover - live failure path
102173 output = exc .stdout or ""
103174 stderr = exc .stderr or ""
@@ -109,13 +180,28 @@ def _run_cli(command: Iterable[str], env: dict[str, str]) -> str:
109180 f"Stdout: { output } \n "
110181 f"Stderr: { stderr } "
111182 )
183+ duration = perf_counter () - start
184+ stdout_preview = _preview_output (result .stdout )
185+ stderr_preview = _preview_output (result .stderr ) if result .stderr else None
186+ _log (
187+ f"AKS Agent CLI completed in { duration :.1f} s with stdout preview: { stdout_preview } "
188+ )
189+ if stderr_preview :
190+ _log (
191+ f"AKS Agent CLI stderr preview: { stderr_preview } "
192+ )
112193 return result .stdout
113194
114195
115196def _run_commands (
116197 commands : list [str ], env : dict [str , str ], label : str , scenario : Scenario
117198) -> None :
199+ if not commands :
200+ _log (f"[{ label } ] { scenario .name } : no commands to run" )
201+ return
118202 for cmd in commands :
203+ _log (f"[{ label } ] { scenario .name } : running shell command: { cmd } " )
204+ start = perf_counter ()
119205 try :
120206 completed = subprocess .run ( # noqa: S603
121207 cmd ,
@@ -137,9 +223,25 @@ def _run_commands(
137223 f"Stderr: { stderr } "
138224 )
139225 else :
226+ duration = perf_counter () - start
140227 # Provide quick visibility into command results when debugging failures.
141228 if completed .stdout :
142- print (f"[{ label } ] { scenario .name } : { completed .stdout .strip ()} " )
229+ stdout_preview = _preview_output (completed .stdout )
230+ _log (
231+ f"[{ label } ] { scenario .name } : succeeded in { duration :.1f} s; stdout preview: { stdout_preview } "
232+ )
233+ else :
234+ _log (
235+ f"[{ label } ] { scenario .name } : succeeded in { duration :.1f} s; no stdout produced"
236+ )
237+ if completed .stderr :
238+ stderr_preview = _preview_output (completed .stderr )
239+ _log (
240+ f"[{ label } ] { scenario .name } : stderr preview: { stderr_preview } "
241+ )
242+ _log (
243+ f"[{ label } ] { scenario .name } : completed { len (commands )} command(s)"
244+ )
143245
144246
145247def _scenario_params () -> list :
@@ -165,6 +267,7 @@ def test_ask_agent_live(
165267 request : pytest .FixtureRequest ,
166268) -> None :
167269 iteration_label = f"[iteration { iteration + 1 } /{ ITERATIONS } ]"
270+ _log (f"{ iteration_label } starting scenario { scenario .name } " )
168271 if RUN_LIVE :
169272 env = _load_env ()
170273
@@ -178,7 +281,7 @@ def test_ask_agent_live(
178281 env .update (scenario .env_overrides )
179282
180283 if iteration == 0 and scenario .before_commands and not aks_skip_setup :
181- print (f"{ iteration_label } running setup commands for { scenario .name } " )
284+ _log (f"{ iteration_label } running setup commands for { scenario .name } " )
182285 _run_commands (scenario .before_commands , env , "setup" , scenario )
183286
184287 command = _build_command (
@@ -188,7 +291,7 @@ def test_ask_agent_live(
188291 cluster_name = cluster_name ,
189292 )
190293
191- print (f"{ iteration_label } invoking AKS Agent CLI for { scenario .name } " )
294+ _log (f"{ iteration_label } invoking AKS Agent CLI for { scenario .name } " )
192295 try :
193296 raw_output = _run_cli (command , env )
194297 answer = ""
@@ -216,11 +319,11 @@ def test_ask_agent_live(
216319 classifier_rationale = classifier_result .metadata .get (
217320 "rationale" , ""
218321 )
219- print (
322+ _log (
220323 f"{ iteration_label } classifier score for { scenario .name } : { classifier_score } "
221324 )
222325 if classifier_score is None :
223- print (
326+ _log (
224327 f"{ iteration_label } classifier returned no score for { scenario .name } ; falling back to substring checks"
225328 )
226329 else :
@@ -230,7 +333,7 @@ def test_ask_agent_live(
230333 if not error_message :
231334 error_message = "Classifier judged answer incorrect"
232335 else :
233- print (
336+ _log (
234337 f"{ iteration_label } classifier unavailable for { scenario .name } ; falling back to substring checks"
235338 )
236339
@@ -280,21 +383,21 @@ def test_ask_agent_live(
280383
281384 if GENERATE_MOCKS :
282385 mock_path = save_mock_answer (scenario .mock_path , answer )
283- print (f"{ iteration_label } [mock] wrote response to { mock_path } " )
386+ _log (f"{ iteration_label } [mock] wrote response to { mock_path } " )
284387 finally :
285388 if (
286389 iteration == ITERATIONS - 1
287390 and scenario .after_commands
288391 and not aks_skip_cleanup
289392 ):
290- print (f"{ iteration_label } running cleanup commands for { scenario .name } " )
393+ _log (f"{ iteration_label } running cleanup commands for { scenario .name } " )
291394 _run_commands (scenario .after_commands , env , "cleanup" , scenario )
292395 else :
293396 if GENERATE_MOCKS :
294397 pytest .fail ("GENERATE_MOCKS requires RUN_LIVE=true" )
295398 try :
296399 answer = load_mock_answer (scenario .mock_path )
297- print (f"{ iteration_label } replayed mock response for { scenario .name } " )
400+ _log (f"{ iteration_label } replayed mock response for { scenario .name } " )
298401 except FileNotFoundError :
299402 pytest .skip (f"Mock response missing for scenario { scenario .name } ; rerun with RUN_LIVE=true GENERATE_MOCKS=true" )
300403
@@ -328,5 +431,6 @@ def test_ask_agent_live(
328431 _set_user_property (request , 'braintrust_root_span_id' , str (root_span_id ))
329432 if url :
330433 _set_user_property (request , 'braintrust_experiment_url' , str (url ))
434+ _log (f"{ iteration_label } completed scenario { scenario .name } (passed={ passed } )" )
331435 if not passed :
332436 pytest .fail (f"Scenario { scenario .name } : { error } \n AI answer:\n { answer } " )
0 commit comments