Skip to content

Commit 2a4d879

Browse files
committed
Update how "success metrics" are calculated
1 parent 34fdf68 commit 2a4d879

File tree

1 file changed

+235
-15
lines changed

1 file changed

+235
-15
lines changed

coffee_ws/src/coffee_voice_agent_ui/coffee_voice_agent_ui/widgets/analytics_widget.py

Lines changed: 235 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,49 @@
44
55
Shows session statistics, performance metrics, usage trends,
66
and system analytics for the voice agent system.
7+
8+
ANALYTICS METHODOLOGY:
9+
======================
10+
11+
This widget provides real-time analytics based on actual system data rather than simulated metrics.
12+
It consumes data from the ROS2 bridge which receives events from the voice agent system.
13+
14+
SUCCESS RATE CALCULATION:
15+
------------------------
16+
The success rate is a weighted combination of two key effectiveness metrics:
17+
18+
1. Tool Success Rate (40% weight):
19+
- Measures the percentage of function tool calls that complete successfully
20+
- Based on tool events with status "completed" vs "failed"
21+
- Indicates how well the agent can execute requested actions
22+
23+
2. Conversation Success Rate (60% weight):
24+
- Measures the percentage of conversations that end naturally/successfully
25+
- Natural endings: User says goodbye, task completion
26+
- Failed endings: Timeouts, unexpected disconnects, very short interactions
27+
- Indicates user satisfaction and conversation quality
28+
29+
Combined Formula: (Tool Success × 0.4) + (Conversation Success × 0.6)
30+
31+
FALLBACK BEHAVIOR:
32+
- If only one metric is available, uses that metric with appropriate labeling
33+
- If no effectiveness data available, falls back to connection status as basic health indicator
34+
- Labels indicate data source: "85% (tools)", "92% (conv)", "100% (conn)"
35+
36+
DATA SOURCES:
37+
------------
38+
- agent_data: Connection status, emotion states, conversation counts
39+
- tool_data: Tool usage statistics, success/failure rates, response times
40+
- conversation_data: Session timing, turn counts, conversation outcomes
41+
42+
REAL-TIME UPDATES:
43+
-----------------
44+
- Message rate: Calculated from actual ROS message timestamps
45+
- Popular interactions: Ranked by actual tool usage counts
46+
- Emotion trends: Analyzed from real emotion transition events
47+
- System metrics: Derived from live system performance data
48+
49+
This approach provides meaningful operational insights rather than placeholder metrics.
750
"""
851

952
from datetime import datetime, timedelta
@@ -33,6 +76,10 @@ def __init__(self):
3376
self.connection_events = [] # For health metrics
3477
self.user_speech_events = [] # For interaction tracking
3578

79+
# Success rate tracking - measures actual system effectiveness
80+
self.tool_success_events = [] # Tool completion/failure events
81+
self.conversation_outcomes = [] # Natural vs timeout conversation endings
82+
3683
# Session data - now calculated from real events
3784
self.session_data = {
3885
'conversations_today': 0,
@@ -328,7 +375,7 @@ def _process_agent_data(self, agent_data):
328375
self.emotion_history = self.emotion_history[-100:]
329376

330377
def _process_tool_data(self, tool_data):
331-
"""Process tool usage data"""
378+
"""Process tool usage data and track success/failure rates"""
332379
if 'most_used_tools' in tool_data:
333380
for tool_name, stats in tool_data['most_used_tools']:
334381
self.tool_usage_counts[tool_name] = stats.get('total_calls', 0)
@@ -339,27 +386,72 @@ def _process_tool_data(self, tool_data):
339386
# Keep only recent response times
340387
if len(self.tool_response_times[tool_name]) > 20:
341388
self.tool_response_times[tool_name] = self.tool_response_times[tool_name][-20:]
342-
389+
390+
# Track tool success/failure events for success rate calculation
391+
successful_calls = stats.get('successful_calls', 0)
392+
failed_calls = stats.get('failed_calls', 0)
393+
394+
if successful_calls > 0 or failed_calls > 0:
395+
self.tool_success_events.append({
396+
'timestamp': datetime.now(),
397+
'tool_name': tool_name,
398+
'successful': successful_calls,
399+
'failed': failed_calls,
400+
'success_rate': (successful_calls / (successful_calls + failed_calls)) * 100 if (successful_calls + failed_calls) > 0 else 0
401+
})
402+
403+
# Keep only recent events for analysis
404+
if len(self.tool_success_events) > 100:
405+
self.tool_success_events = self.tool_success_events[-100:]
406+
343407
def _process_conversation_data(self, conversation_data):
344-
"""Process conversation analytics data"""
408+
"""Process conversation analytics data and track conversation outcomes"""
345409
if conversation_data.get('conversation_active'):
346410
if not self.current_session_start:
347411
self.current_session_start = datetime.now()
348412
else:
349413
if self.current_session_start:
350-
# Session ended - record it
414+
# Session ended - determine outcome type
351415
duration = datetime.now() - self.current_session_start
352-
self.conversation_sessions.append({
416+
417+
# Analyze conversation outcome (this could be enhanced with more data from conversation_data)
418+
# For now, we use heuristics based on duration and available data
419+
outcome_type = 'completed' # Default assumption
420+
421+
# Check for timeout indicators
422+
if conversation_data.get('timeout_reached', False):
423+
outcome_type = 'timeout'
424+
elif conversation_data.get('user_disconnected', False):
425+
outcome_type = 'user_disconnect'
426+
elif conversation_data.get('natural_ending', False):
427+
outcome_type = 'natural'
428+
elif duration.total_seconds() < 30: # Very short conversations might be failures
429+
outcome_type = 'failed'
430+
431+
session_record = {
353432
'start': self.current_session_start,
354433
'duration': duration,
355-
'completed': True # Assume completed for now
434+
'outcome': outcome_type,
435+
'turn_count': conversation_data.get('turn_count', 0),
436+
'successful': outcome_type in ['completed', 'natural']
437+
}
438+
439+
self.conversation_sessions.append(session_record)
440+
self.conversation_outcomes.append({
441+
'timestamp': datetime.now(),
442+
'outcome': outcome_type,
443+
'duration': duration,
444+
'successful': outcome_type in ['completed', 'natural']
356445
})
446+
357447
self.current_session_start = None
358448

359-
# Keep only recent sessions
449+
# Keep only recent sessions and outcomes
360450
if len(self.conversation_sessions) > 50:
361451
self.conversation_sessions = self.conversation_sessions[-50:]
362-
452+
if len(self.conversation_outcomes) > 100:
453+
self.conversation_outcomes = self.conversation_outcomes[-100:]
454+
363455
def _calculate_session_metrics(self):
364456
"""Calculate and update session performance metrics"""
365457
# Conversations today
@@ -374,15 +466,82 @@ def _calculate_session_metrics(self):
374466
else:
375467
self.avg_duration_label.setText("--")
376468

377-
# Success rate (based on connection status)
378-
if self.connection_events:
379-
connected_time = sum(1 for event in self.connection_events if event['connected'])
380-
total_events = len(self.connection_events)
381-
success_rate = (connected_time / total_events) * 100 if total_events > 0 else 0
469+
# Success rate - now based on actual system effectiveness
470+
self._calculate_success_rate()
471+
472+
def _calculate_success_rate(self):
473+
"""
474+
Calculate comprehensive success rate based on tool effectiveness and conversation outcomes.
475+
476+
Success Rate Methodology:
477+
- Tool Success Rate (40% weight): Percentage of tool calls that complete successfully
478+
- Conversation Success Rate (60% weight): Percentage of conversations that end naturally/successfully
479+
480+
This provides a meaningful measure of system effectiveness rather than just connectivity.
481+
482+
Success Criteria:
483+
- Tool Success: Tool calls with status "completed" vs "failed"
484+
- Conversation Success: Natural endings, user satisfaction vs timeouts, disconnects
485+
486+
Fallback: If insufficient data, shows connection status as basic health indicator.
487+
"""
488+
489+
tool_success_rate = None
490+
conversation_success_rate = None
491+
492+
# Calculate tool success rate
493+
if self.tool_success_events:
494+
recent_tool_events = [event for event in self.tool_success_events
495+
if (datetime.now() - event['timestamp']).total_seconds() < 3600] # Last hour
496+
497+
if recent_tool_events:
498+
total_successful = sum(event['successful'] for event in recent_tool_events)
499+
total_failed = sum(event['failed'] for event in recent_tool_events)
500+
total_calls = total_successful + total_failed
501+
502+
if total_calls > 0:
503+
tool_success_rate = (total_successful / total_calls) * 100
504+
505+
# Calculate conversation success rate
506+
if self.conversation_outcomes:
507+
recent_outcomes = [outcome for outcome in self.conversation_outcomes
508+
if (datetime.now() - outcome['timestamp']).total_seconds() < 3600] # Last hour
509+
510+
if recent_outcomes:
511+
successful_conversations = sum(1 for outcome in recent_outcomes if outcome['successful'])
512+
total_conversations = len(recent_outcomes)
513+
514+
if total_conversations > 0:
515+
conversation_success_rate = (successful_conversations / total_conversations) * 100
516+
517+
# Combine success rates with weighting
518+
if tool_success_rate is not None and conversation_success_rate is not None:
519+
# Both metrics available - use weighted combination
520+
success_rate = (tool_success_rate * 0.4) + (conversation_success_rate * 0.6)
521+
self.success_rate_label.setText(f"{success_rate:.0f}%")
522+
523+
elif tool_success_rate is not None:
524+
# Only tool data available
525+
success_rate = tool_success_rate
526+
self.success_rate_label.setText(f"{success_rate:.0f}% (tools)")
527+
528+
elif conversation_success_rate is not None:
529+
# Only conversation data available
530+
success_rate = conversation_success_rate
531+
self.success_rate_label.setText(f"{success_rate:.0f}% (conv)")
532+
382533
else:
383-
success_rate = 100.0 if self.current_connection_status else 0.0
534+
# Fallback to connection status when no meaningful data available
535+
if self.connection_events:
536+
connected_time = sum(1 for event in self.connection_events if event['connected'])
537+
total_events = len(self.connection_events)
538+
success_rate = (connected_time / total_events) * 100 if total_events > 0 else 0
539+
self.success_rate_label.setText(f"{success_rate:.0f}% (conn)")
540+
else:
541+
success_rate = 100.0 if self.current_connection_status else 0.0
542+
self.success_rate_label.setText(f"{success_rate:.0f}% (conn)")
384543

385-
self.success_rate_label.setText(f"{success_rate:.0f}%")
544+
# Update progress bar and color coding
386545
self.success_progress.setValue(int(success_rate))
387546

388547
# Color code success rate
@@ -395,6 +554,67 @@ def _calculate_session_metrics(self):
395554

396555
self.success_rate_label.setStyleSheet(f"color: {color};")
397556
self.success_progress.setStyleSheet(f"QProgressBar::chunk {{ background-color: {color}; }}")
557+
558+
def get_success_rate_debug_info(self):
559+
"""
560+
Get detailed information about success rate calculation for debugging/logging.
561+
562+
Returns:
563+
dict: Debug information including component metrics and data sources
564+
"""
565+
debug_info = {
566+
'tool_events_count': len(self.tool_success_events),
567+
'conversation_outcomes_count': len(self.conversation_outcomes),
568+
'connection_events_count': len(self.connection_events),
569+
'tool_success_rate': None,
570+
'conversation_success_rate': None,
571+
'combined_success_rate': None,
572+
'data_source': 'none'
573+
}
574+
575+
# Calculate tool success rate for debug
576+
if self.tool_success_events:
577+
recent_tool_events = [event for event in self.tool_success_events
578+
if (datetime.now() - event['timestamp']).total_seconds() < 3600]
579+
580+
if recent_tool_events:
581+
total_successful = sum(event['successful'] for event in recent_tool_events)
582+
total_failed = sum(event['failed'] for event in recent_tool_events)
583+
total_calls = total_successful + total_failed
584+
585+
if total_calls > 0:
586+
debug_info['tool_success_rate'] = (total_successful / total_calls) * 100
587+
debug_info['tool_successful_calls'] = total_successful
588+
debug_info['tool_failed_calls'] = total_failed
589+
590+
# Calculate conversation success rate for debug
591+
if self.conversation_outcomes:
592+
recent_outcomes = [outcome for outcome in self.conversation_outcomes
593+
if (datetime.now() - outcome['timestamp']).total_seconds() < 3600]
594+
595+
if recent_outcomes:
596+
successful_conversations = sum(1 for outcome in recent_outcomes if outcome['successful'])
597+
total_conversations = len(recent_outcomes)
598+
599+
if total_conversations > 0:
600+
debug_info['conversation_success_rate'] = (successful_conversations / total_conversations) * 100
601+
debug_info['successful_conversations'] = successful_conversations
602+
debug_info['total_conversations'] = total_conversations
603+
604+
# Determine final calculation
605+
if debug_info['tool_success_rate'] is not None and debug_info['conversation_success_rate'] is not None:
606+
debug_info['combined_success_rate'] = (debug_info['tool_success_rate'] * 0.4) + (debug_info['conversation_success_rate'] * 0.6)
607+
debug_info['data_source'] = 'combined'
608+
elif debug_info['tool_success_rate'] is not None:
609+
debug_info['combined_success_rate'] = debug_info['tool_success_rate']
610+
debug_info['data_source'] = 'tools_only'
611+
elif debug_info['conversation_success_rate'] is not None:
612+
debug_info['combined_success_rate'] = debug_info['conversation_success_rate']
613+
debug_info['data_source'] = 'conversations_only'
614+
else:
615+
debug_info['data_source'] = 'connection_fallback'
616+
617+
return debug_info
398618

399619
def _calculate_popular_interactions(self):
400620
"""Calculate and update popular interactions"""

0 commit comments

Comments
 (0)