fix: better pormpt to detect goal, manages end session from agents and improve logs

xavidop · xavidop · commit 55a9d9890d86 · 2026-02-25T14:02:10.000+01:00
diff --git a/pkg/test/agent-runner.go b/pkg/test/agent-runner.go
@@ -108,6 +108,17 @@ func (atr *AgentTestRunner) ExecuteAgentTest(ctx context.Context, agentTest test
 			return fmt.Errorf("failed to interact with Voiceflow at step %d: %w", currentStep, err)
 		}
 
+		// Check if the Voiceflow agent's session has ended
+		if ended, reason := atr.HasEndResponse(voiceflowResponse); ended {
+			atr.addLog(fmt.Sprintf("Voiceflow agent session ended (reason: %s)", reason))
+			// Still extract and log the last message if available
+			lastMessage := atr.ExtractMessage(voiceflowResponse)
+			if lastMessage != "" {
+				atr.addLog(fmt.Sprintf("Voiceflow agent final message: %s", lastMessage))
+			}
+			break
+		}
+
 		// Get the next action from the AI agent
 		agentResponse, err = atr.getNextAction(voiceflowResponse, agentTest.Goal, currentStep+1, agentTest.MaxSteps)
 		if err != nil {
diff --git a/pkg/test/common.go b/pkg/test/common.go
@@ -83,7 +83,8 @@ func (br *BaseRunner) InteractWithVoiceflow(messageType, message, environmentNam
 	return br.ProcessResponses(responses), nil
 }
 
-// ProcessResponses handles multiple responses by concatenating messages
+// ProcessResponses handles multiple responses by concatenating messages.
+// It preserves "end" type responses so callers can detect session termination.
 func (br *BaseRunner) ProcessResponses(responses []interact.InteractionResponse) []interact.InteractionResponse {
 	if len(responses) == 0 {
 		br.AddLog("No response received from Voiceflow")
@@ -93,21 +94,43 @@ func (br *BaseRunner) ProcessResponses(responses []interact.InteractionResponse)
 	// If there are multiple responses, concatenate their messages
 	if len(responses) > 1 {
 		var concatenatedMessage strings.Builder
+		var endResponse *interact.InteractionResponse
+		firstMessageIdx := -1
+
 		for i, response := range responses {
+			// Preserve end response for session termination detection
+			if response.Type == "end" {
+				endResponse = &responses[i]
+				continue
+			}
 			if message, ok := response.Payload["message"].(string); ok && message != "" {
-				if i > 0 {
+				if firstMessageIdx == -1 {
+					firstMessageIdx = i
+				} else {
 					concatenatedMessage.WriteString(" ")
 				}
 				concatenatedMessage.WriteString(message)
 			}
 		}
 
-		// Update the first response with the concatenated message
-		if concatenatedMessage.Len() > 0 {
-			responses[0].Payload["message"] = concatenatedMessage.String()
+		var result []interact.InteractionResponse
+
+		// Update the first message response with the concatenated message
+		if firstMessageIdx >= 0 && concatenatedMessage.Len() > 0 {
+			responses[firstMessageIdx].Payload["message"] = concatenatedMessage.String()
+			result = append(result, responses[firstMessageIdx])
+		}
+
+		// Append end response if present so callers can detect session end
+		if endResponse != nil {
+			result = append(result, *endResponse)
+		}
+
+		if len(result) > 0 {
+			return result
 		}
 
-		// Return only the first response with the concatenated message
+		// Fallback: return the original first response
 		return responses[:1]
 	}
 
@@ -124,6 +147,23 @@ func (br *BaseRunner) ExtractMessage(voiceflowResponse []interact.InteractionRes
 	return ""
 }
 
+// HasEndResponse checks if any of the Voiceflow responses contain an "end" type trace,
+// which indicates the agent's session has ended.
+func (br *BaseRunner) HasEndResponse(responses []interact.InteractionResponse) (bool, string) {
+	for _, response := range responses {
+		if response.Type == "end" {
+			reason := ""
+			if response.Payload != nil {
+				if r, ok := response.Payload["reason"].(string); ok {
+					reason = r
+				}
+			}
+			return true, reason
+		}
+	}
+	return false, ""
+}
+
 // IsGoalAchieved uses OpenAI to evaluate if the goal has been achieved
 func (br *BaseRunner) IsGoalAchieved(goal string) (bool, error) {
 	// Build conversation summary
@@ -141,17 +181,22 @@ func (br *BaseRunner) IsGoalAchieved(goal string) (bool, error) {
 		}
 	}
 
-	prompt := fmt.Sprintf(`Analyze the following conversation and determine if the goal has been achieved.
+	prompt := fmt.Sprintf(`Analyze the following conversation between two agents and determine if the stated goal has been achieved or is clearly being fulfilled.
 
 Goal: %s
 
 Conversation:
 %s
 
-Has the goal been achieved? Respond with only "YES" or "NO".`, goal, conversationSummary.String())
+When evaluating, consider:
+- Has the goal been explicitly completed (e.g., a clear confirmation message)?
+- Has the goal been effectively achieved even without an explicit final confirmation? For example, if all required information has been gathered and the action is being processed, or both parties are acting as if the goal is accomplished, consider it achieved.
+- Look at the overall intent and progression of the conversation, not just the last message.
+
+Based on the full conversation context, has the goal been achieved or effectively fulfilled? Respond with only "YES" or "NO".`, goal, conversationSummary.String())
 
 	messages := []ChatMessage{
-		{Role: "system", Content: "You are a helpful assistant that analyzes conversations and determines if goals have been achieved."},
+		{Role: "system", Content: "You are an expert evaluator that analyzes conversations between two agents and determines if a stated goal has been achieved or effectively fulfilled. Consider the overall intent, actions taken, and progression of the conversation. A goal can be considered achieved if the necessary actions have been completed or are clearly being executed, even without an explicit final confirmation message."},
 		{Role: "user", Content: prompt},
 	}
 
diff --git a/pkg/test/voiceflow-agent-runner.go b/pkg/test/voiceflow-agent-runner.go
@@ -128,6 +128,12 @@ func (vatr *VoiceflowAgentTestRunner) ExecuteAgentTest(ctx context.Context, agen
 
 		vatr.addLog(fmt.Sprintf("Step %d", currentStep))
 
+		// Check if the tester agent's session has ended
+		if ended, reason := vatr.HasEndResponse(testerResponse); ended {
+			vatr.addLog(fmt.Sprintf("Tester agent session ended (reason: %s)", reason))
+			break
+		}
+
 		// Get the tester's message and send it to the target agent
 		testerMessage := vatr.ExtractMessage(testerResponse)
 		if testerMessage == "" {
@@ -146,6 +152,17 @@ func (vatr *VoiceflowAgentTestRunner) ExecuteAgentTest(ctx context.Context, agen
 			return fmt.Errorf("failed to interact with target agent at step %d: %w", currentStep, err)
 		}
 
+		// Check if the target agent's session has ended
+		if ended, reason := vatr.HasEndResponse(targetAgentResponse); ended {
+			vatr.addLog(fmt.Sprintf("Target agent session ended (reason: %s)", reason))
+			// Still extract and log the last message if available
+			targetMessage := vatr.ExtractMessage(targetAgentResponse)
+			if targetMessage != "" {
+				vatr.addLog(fmt.Sprintf("Target agent final message: %s", targetMessage))
+			}
+			break
+		}
+
 		targetMessage := vatr.ExtractMessage(targetAgentResponse)
 		vatr.addLog(fmt.Sprintf("Target agent says: %s", targetMessage))