Skip to content

Commit c3d60c8

Browse files
committed
feat: add debug output with token/cost metrics
- Added llm_end callback to openai_client.py and llm.py with: - tokens_in, tokens_out from usage - cost from litellm.completion_cost() - latency_ms timing - Updated trace.py and status.py to display metrics - Added show_metrics param to enable debug mode metrics display - Agent.py passes metrics flag to enable_status_output Debug output now shows: [17:18:22] Calling LLM (gpt-4o-mini)... [17:18:24] │ 📊 gpt-4o-mini: 104→14 tokens [1.5s] [17:18:24] ▸ get_weather(city='Tokyo') [17:18:24] Calling LLM (gpt-4o-mini)... [17:18:25] │ 📊 gpt-4o-mini: 133→8 tokens [0.7s] Response: The weather in Tokyo is sunny.
1 parent 8c509b4 commit c3d60c8

File tree

6 files changed

+168
-17
lines changed

6 files changed

+168
-17
lines changed

src/praisonai-agents/basic-agents-tools.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ def get_weather(city: str) -> str:
55

66
agent = Agent(
77
instructions="You are a helpful assistant",
8-
llm="openai/gpt-4o-mini",
8+
llm="gpt-4o-mini",
99
tools=[get_weather],
10-
output="status"
10+
output="debug"
1111
)
1212

1313
agent.start("What is the weather in Tokyo?")

src/praisonai-agents/praisonaiagents/agent/agent.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,11 +500,13 @@ def __init__(
500500
if not is_status_output_enabled():
501501
output_format = "jsonl" if json_output else "text"
502502
# simple_output=True means status preset (no timestamps)
503+
# metrics=True means debug preset (show token/cost info)
503504
enable_status_output(
504505
redact=True,
505506
use_color=True,
506507
format=output_format,
507-
show_timestamps=not simple_output
508+
show_timestamps=not simple_output,
509+
show_metrics=metrics
508510
)
509511
except ImportError:
510512
pass # Status module not available

src/praisonai-agents/praisonaiagents/llm/llm.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,6 +1574,39 @@ def get_response(
15741574
if self.metrics:
15751575
self._track_token_usage(final_response, self.model)
15761576

1577+
# Trigger llm_end callback with metrics for debug output
1578+
llm_latency_ms = (time.time() - current_time) * 1000
1579+
1580+
# Extract usage - handle both dict and ModelResponse object
1581+
tokens_in = 0
1582+
tokens_out = 0
1583+
if isinstance(final_response, dict):
1584+
usage = final_response.get("usage", {})
1585+
tokens_in = usage.get("prompt_tokens", 0)
1586+
tokens_out = usage.get("completion_tokens", 0)
1587+
else:
1588+
# ModelResponse object
1589+
usage = getattr(final_response, 'usage', None)
1590+
if usage:
1591+
tokens_in = getattr(usage, 'prompt_tokens', 0) or 0
1592+
tokens_out = getattr(usage, 'completion_tokens', 0) or 0
1593+
1594+
# Calculate cost if available
1595+
llm_cost = None
1596+
try:
1597+
llm_cost = litellm.completion_cost(completion_response=final_response)
1598+
except Exception:
1599+
pass
1600+
1601+
execute_sync_callback(
1602+
'llm_end',
1603+
model=self.model,
1604+
tokens_in=tokens_in,
1605+
tokens_out=tokens_out,
1606+
cost=llm_cost,
1607+
latency_ms=llm_latency_ms
1608+
)
1609+
15771610
# Execute callbacks and display based on verbose setting
15781611
generation_time_val = time.time() - current_time
15791612
response_content = f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}" if reasoning_content else response_text

src/praisonai-agents/praisonaiagents/llm/openai_client.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,6 +1205,35 @@ def chat_completion_with_tools(
12051205
if not final_response:
12061206
return None
12071207

1208+
# Trigger llm_end callback with metrics for debug output
1209+
llm_end_time = time.perf_counter()
1210+
llm_latency_ms = (llm_end_time - start_time) * 1000
1211+
1212+
# Extract usage info if available
1213+
usage = getattr(final_response, 'usage', None)
1214+
tokens_in = getattr(usage, 'prompt_tokens', 0) if usage else 0
1215+
tokens_out = getattr(usage, 'completion_tokens', 0) if usage else 0
1216+
1217+
# Calculate cost if litellm available
1218+
cost = None
1219+
try:
1220+
import litellm
1221+
if hasattr(final_response, 'model_dump'):
1222+
cost = litellm.completion_cost(completion_response=final_response.model_dump())
1223+
elif isinstance(final_response, dict):
1224+
cost = litellm.completion_cost(completion_response=final_response)
1225+
except Exception:
1226+
pass # Cost calculation is optional
1227+
1228+
execute_sync_callback(
1229+
'llm_end',
1230+
model=model,
1231+
tokens_in=tokens_in,
1232+
tokens_out=tokens_out,
1233+
cost=cost,
1234+
latency_ms=llm_latency_ms
1235+
)
1236+
12081237
# Check for tool calls
12091238
tool_calls = getattr(final_response.choices[0].message, 'tool_calls', None)
12101239

src/praisonai-agents/praisonaiagents/output/status.py

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,14 @@ def __init__(
7979
redact: bool = True,
8080
use_color: bool = True,
8181
show_timestamps: bool = True, # NEW: control timestamp display
82+
show_metrics: bool = False, # Enable metrics display for debug mode
8283
):
8384
self._file = file or sys.stderr # Use stderr to not interfere with agent output
8485
self._format = format
8586
self._redact = redact
8687
self._use_color = use_color
8788
self._show_timestamps = show_timestamps
89+
self._show_metrics = show_metrics
8890
self._console = None
8991
self._tool_start_times: Dict[str, float] = {}
9092
self._lock = threading.Lock() # Per-sink lock for thread safety
@@ -170,23 +172,55 @@ def llm_start(self, model: str = None, agent_name: Optional[str] = None) -> None
170172
else:
171173
self._emit_text(f"▸ AI → {context}...", ts, "yellow")
172174

173-
def llm_end(self, duration_ms: Optional[float] = None, agent_name: Optional[str] = None) -> None:
174-
"""Record LLM call end."""
175+
def llm_end(
176+
self,
177+
duration_ms: Optional[float] = None,
178+
agent_name: Optional[str] = None,
179+
model: Optional[str] = None,
180+
tokens_in: int = 0,
181+
tokens_out: int = 0,
182+
cost: Optional[float] = None,
183+
latency_ms: Optional[float] = None,
184+
) -> None:
185+
"""Record LLM call end with optional metrics for debug mode."""
175186
ts = time.time()
176187

177-
# Calculate duration if not provided
178-
if duration_ms is None and hasattr(self, '_llm_start_time'):
188+
# Use latency_ms if provided, otherwise calculate from start time
189+
if latency_ms is not None:
190+
duration_ms = latency_ms
191+
elif duration_ms is None and hasattr(self, '_llm_start_time'):
179192
start_ts = self._llm_start_time
180193
if start_ts:
181194
duration_ms = (ts - start_ts) * 1000
182195

183-
duration_str = f" [{_format_duration(duration_ms)}]" if duration_ms else ""
196+
# Track session totals for summary
197+
if not hasattr(self, '_session_tokens_in'):
198+
self._session_tokens_in = 0
199+
self._session_tokens_out = 0
200+
self._session_cost = 0.0
201+
self._session_llm_calls = 0
202+
203+
self._session_tokens_in += tokens_in
204+
self._session_tokens_out += tokens_out
205+
if cost:
206+
self._session_cost += cost
207+
self._session_llm_calls += 1
208+
209+
# Only show metrics line in debug mode (when show_metrics is enabled)
210+
show_metrics = getattr(self, '_show_metrics', False)
184211

185212
if self._format == "jsonl":
186-
self._emit_jsonl("llm_end", agent_name=agent_name, timestamp=ts, duration_ms=duration_ms)
187-
else:
188-
prefix = f"[{agent_name}] " if agent_name else ""
189-
self._emit_text(f"{prefix}✓ LLM responded{duration_str}", ts, "green")
213+
self._emit_jsonl("llm_end", agent_name=agent_name, timestamp=ts,
214+
duration_ms=duration_ms, model=model,
215+
tokens_in=tokens_in, tokens_out=tokens_out, cost=cost)
216+
elif show_metrics and (tokens_in > 0 or tokens_out > 0):
217+
# Debug mode: show metrics line
218+
duration_str = f" [{_format_duration(duration_ms)}]" if duration_ms else ""
219+
model_str = model.split('/')[-1] if model else "?" # Short model name
220+
cost_str = f" (~${cost:.4f})" if cost else ""
221+
222+
metrics_line = f" │ 📊 {model_str}: {tokens_in}{tokens_out} tokens{cost_str}{duration_str}"
223+
self._emit_text(metrics_line, ts, "dim", show_timestamp=False)
190224

191225
def tool_start(self, tool_name: str, tool_args: Optional[Dict[str, Any]] = None, agent_name: Optional[str] = None) -> None:
192226
"""Record tool start - stores info for inline display with result."""
@@ -285,6 +319,7 @@ def enable_status_output(
285319
redact: bool = True,
286320
use_color: bool = True,
287321
show_timestamps: bool = True,
322+
show_metrics: bool = False, # Enable metrics for debug mode
288323
) -> StatusOutput:
289324
"""
290325
Enable actions output mode globally.
@@ -298,6 +333,7 @@ def enable_status_output(
298333
redact: Whether to redact sensitive data (default: True)
299334
use_color: Whether to use colored output (default: True)
300335
show_timestamps: Whether to show timestamps (default: True)
336+
show_metrics: Whether to show token/cost metrics (default: False)
301337
302338
Returns:
303339
StatusOutput instance for programmatic access
@@ -313,6 +349,7 @@ def enable_status_output(
313349
redact=redact,
314350
use_color=use_color,
315351
show_timestamps=show_timestamps,
352+
show_metrics=show_metrics,
316353
)
317354
_status_output_enabled = True
318355

@@ -360,11 +397,19 @@ def on_llm_start(model: str = None, agent_name: str = None, **kwargs):
360397

361398
_status_output.llm_start(model=model, agent_name=agent_name)
362399

400+
def on_llm_end(model: str = None, tokens_in: int = 0, tokens_out: int = 0, cost: float = None, latency_ms: float = None, **kwargs):
401+
"""Callback for LLM call completion with metrics."""
402+
if not _status_output_enabled or _status_output is None:
403+
return
404+
405+
_status_output.llm_end(model=model, tokens_in=tokens_in, tokens_out=tokens_out, cost=cost, latency_ms=latency_ms)
406+
363407
# Register the callbacks
364408
register_display_callback('tool_call', on_tool_call)
365409
register_display_callback('interaction', on_interaction)
366410
register_display_callback('error', on_error)
367411
register_display_callback('llm_start', on_llm_start)
412+
register_display_callback('llm_end', on_llm_end)
368413

369414
return _status_output
370415

src/praisonai-agents/praisonaiagents/output/trace.py

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,32 @@ def llm_start(self, model: str = None) -> None:
9898
model_str = f" ({model})" if model else ""
9999
self._emit(f"Calling LLM{model_str}...", "cyan")
100100

101-
def llm_end(self, duration_ms: float = None) -> None:
102-
"""Record LLM call end."""
103-
if duration_ms is None and self._llm_start_time:
101+
def llm_end(
102+
self,
103+
duration_ms: float = None,
104+
model: str = None,
105+
tokens_in: int = 0,
106+
tokens_out: int = 0,
107+
cost: float = None,
108+
) -> None:
109+
"""Record LLM call end with optional metrics."""
110+
# If duration_ms was passed and is positive, use it directly
111+
# Otherwise calculate from internal tracking
112+
if duration_ms is not None and duration_ms > 0:
113+
pass # Use the passed value
114+
elif self._llm_start_time:
104115
duration_ms = (time.time() - self._llm_start_time) * 1000
105-
duration_str = f" ({duration_ms/1000:.1f}s)" if duration_ms else ""
106-
self._emit(f"LLM responded{duration_str}", "green")
116+
117+
duration_str = f" [{duration_ms/1000:.1f}s]" if duration_ms and duration_ms > 0 else ""
118+
119+
# Show metrics if tokens are available
120+
if tokens_in > 0 or tokens_out > 0:
121+
model_str = model.split('/')[-1] if model else "?"
122+
cost_str = f" (~${cost:.4f})" if cost and cost > 0 else ""
123+
metrics_line = f" │ 📊 {model_str}: {tokens_in}{tokens_out} tokens{cost_str}{duration_str}"
124+
self._emit(metrics_line, "dim")
125+
else:
126+
self._emit(f"LLM responded{duration_str}", "green")
107127
self._llm_start_time = None
108128

109129
def tool_start(self, tool_name: str, tool_args: Dict[str, Any] = None) -> None:
@@ -266,10 +286,32 @@ def on_error(message: str = None, **kwargs):
266286
if message:
267287
_trace_output.error(message)
268288

289+
def on_llm_start(model: str = None, agent_name: str = None, **kwargs):
290+
"""Callback for LLM call start."""
291+
if not _trace_output_enabled or _trace_output is None:
292+
return
293+
294+
_trace_output.llm_start(model=model)
295+
296+
def on_llm_end(model: str = None, tokens_in: int = 0, tokens_out: int = 0, cost: float = None, latency_ms: float = None, **kwargs):
297+
"""Callback for LLM call completion with optional metrics."""
298+
if not _trace_output_enabled or _trace_output is None:
299+
return
300+
301+
_trace_output.llm_end(
302+
duration_ms=latency_ms,
303+
model=model,
304+
tokens_in=tokens_in,
305+
tokens_out=tokens_out,
306+
cost=cost
307+
)
308+
269309
# Register the callbacks
270310
register_display_callback('tool_call', on_tool_call)
271311
register_display_callback('interaction', on_interaction)
272312
register_display_callback('error', on_error)
313+
register_display_callback('llm_start', on_llm_start)
314+
register_display_callback('llm_end', on_llm_end)
273315

274316
return _trace_output
275317

0 commit comments

Comments
 (0)