|
| 1 | +// SPDX-FileCopyrightText: 2025 Deutsche Telekom AG and others |
| 2 | +// |
| 3 | +// SPDX-License-Identifier: Apache-2.0 |
| 4 | + |
| 5 | +package org.eclipse.lmos.arc.assistants.support.filters |
| 6 | + |
| 7 | +import org.eclipse.lmos.adl.server.agents.extensions.RESPONSE_GUIDE_RETRY_REASON |
| 8 | +import org.eclipse.lmos.arc.agents.conversation.Conversation |
| 9 | +import org.eclipse.lmos.arc.agents.conversation.ConversationMessage |
| 10 | +import org.eclipse.lmos.arc.agents.dsl.AgentOutputFilter |
| 11 | +import org.eclipse.lmos.arc.agents.dsl.OutputFilterContext |
| 12 | +import org.eclipse.lmos.arc.agents.dsl.extensions.getCurrentUseCases |
| 13 | +import org.eclipse.lmos.arc.agents.dsl.extensions.llm |
| 14 | +import org.eclipse.lmos.arc.agents.dsl.get |
| 15 | +import org.eclipse.lmos.arc.agents.retry |
| 16 | +import org.eclipse.lmos.arc.core.getOrThrow |
| 17 | +import org.slf4j.LoggerFactory |
| 18 | + |
| 19 | +/** |
| 20 | + * An [AgentOutputFilter] that extracts "MUST" instructions from processed use cases and verifies compliance. |
| 21 | + */ |
| 22 | +class MustFeature(private val keyword: String = "MUST", private val retryMax: Int = 3) : AgentOutputFilter { |
| 23 | + |
| 24 | + private val log = LoggerFactory.getLogger(this::class.java) |
| 25 | + |
| 26 | + override suspend fun filter( |
| 27 | + message: ConversationMessage, |
| 28 | + context: OutputFilterContext |
| 29 | + ): ConversationMessage { |
| 30 | + val useCases = context.getCurrentUseCases() ?: return message |
| 31 | + val currentUseCaseId = useCases.currentUseCaseId ?: return message |
| 32 | + val processedUseCasesText = useCases.processedUseCaseMap[currentUseCaseId] ?: return message |
| 33 | + |
| 34 | + val mustInstructions = processedUseCasesText |
| 35 | + .substringAfter("## Solution", "") // Get text after "## Solution" |
| 36 | + .split(Regex("(?<=[.!?])\\s+")) // Split into sentences |
| 37 | + .filter { it.contains(Regex("\\b$keyword\\b")) } // Match whole word "MUST" |
| 38 | + .map { it.trim() } |
| 39 | + |
| 40 | + if (mustInstructions.isEmpty()) { |
| 41 | + return message |
| 42 | + } |
| 43 | + |
| 44 | + val instructionsText = mustInstructions.joinToString("\n- ") |
| 45 | + log.info("Verifying MUST instructions:\n- $instructionsText") |
| 46 | + |
| 47 | + val verificationResult = context.llm( |
| 48 | + system = """ |
| 49 | + You are a Quality Assurance Evaluator. |
| 50 | + Your role is to rigorously assess whether an Assistant’s responses comply with all required "MUST" instructions. |
| 51 | + Use the full conversation history to determine compliance. |
| 52 | + |
| 53 | + ---- |
| 54 | + |
| 55 | + ## Evaluation Instructions |
| 56 | + You must evaluate the Agent’s response against the following mandatory requirements: |
| 57 | +
|
| 58 | + MUST Instructions: |
| 59 | + ``` |
| 60 | + $instructionsText |
| 61 | + ``` |
| 62 | + |
| 63 | + Conversation History: |
| 64 | + ``` |
| 65 | + ${context.get<Conversation>().transcript.joinToString("\n") { "${it.javaClass.simpleName}: ${it.content}" }} |
| 66 | + AssistantMessage: ${message.content} |
| 67 | + ``` |
| 68 | + |
| 69 | + Evaluate whether the Assistant Responses complies with the MUST instructions. |
| 70 | + |
| 71 | + ---- |
| 72 | + |
| 73 | + ## Evaluation Process |
| 74 | +
|
| 75 | + 1. Instruction Decomposition |
| 76 | + Break down $instructionsText into distinct, testable requirements. |
| 77 | + Treat each "MUST" instruction as independently mandatory. |
| 78 | +
|
| 79 | + 2. Contextual Validation |
| 80 | + Use the full conversation history to determine: |
| 81 | + - Whether prior constraints apply. |
| 82 | + - Whether the response contradicts earlier instructions. |
| 83 | + - Whether required context-dependent behavior was followed. |
| 84 | + |
| 85 | + 3. Strict Compliance Check |
| 86 | + Every MUST instruction must be fully satisfied. |
| 87 | + Partial compliance = failure. |
| 88 | + Implicit or assumed compliance is not acceptable. |
| 89 | + If any instruction is ambiguous, interpret it conservatively. |
| 90 | + |
| 91 | + 4. Failure Detection Rules |
| 92 | + Missing required elements = failure. |
| 93 | + Format violations = failure. |
| 94 | + Tone/style violations (if specified as MUST) = failure. |
| 95 | + Logical contradictions = failure. |
| 96 | + Ignoring conversation context = failure. |
| 97 | + |
| 98 | + ---- |
| 99 | + |
| 100 | + ## Output Rules (Critical) |
| 101 | +
|
| 102 | + If all MUST instructions are fully satisfied, output exactly: |
| 103 | + ``` |
| 104 | + PASS |
| 105 | + ``` |
| 106 | +
|
| 107 | + If any MUST instruction is violated, output: |
| 108 | + A concise but specific explanation of: |
| 109 | + - Which instruction was violated |
| 110 | + - Why it was violated |
| 111 | + - The final answer from the Assistant rephrased to comply with the MUST instructions so that it can be to sent to the user. |
| 112 | + - Do NOT output "PASS" in this case. |
| 113 | + - Do NOT include praise, soft language, or meta commentary. |
| 114 | +
|
| 115 | + ---- |
| 116 | +
|
| 117 | + ## Output Format |
| 118 | +
|
| 119 | + If PASS: |
| 120 | + ``` |
| 121 | + PASS |
| 122 | + ``` |
| 123 | + |
| 124 | + If FAIL: |
| 125 | + ``` |
| 126 | + FAIL |
| 127 | + |
| 128 | + Issue: |
| 129 | + - [Instruction violated] |
| 130 | + - Explanation: [Why it fails] |
| 131 | + - Fixed Response: [The final answer from the Assistant rephrased to comply with the MUST instructions so that it can be to sent to the user.] |
| 132 | + ``` |
| 133 | + |
| 134 | + **Important*: The Fixed Response MUST be ready to be sent directly to the user. |
| 135 | + - It MUST match the tone, style, and format requirements specified in the MUST instructions. |
| 136 | + - The Fixed Response should not include any explanations, apologies, or meta commentary. |
| 137 | + |
| 138 | + """.trimIndent(), |
| 139 | + user = "Verify the response." |
| 140 | + ).getOrThrow().content |
| 141 | + |
| 142 | + if (verificationResult.replace("```", "").replace(""""""", "").trim().uppercase() == "PASS") { |
| 143 | + log.info("MustFeature verification passed.") |
| 144 | + return message |
| 145 | + } |
| 146 | + |
| 147 | + log.warn("MustFeature verification failed: $verificationResult") |
| 148 | + val fixedResponse = verificationResult.substringAfter("Fixed Response:") |
| 149 | + .trim() |
| 150 | + .replace("```", "") |
| 151 | + .replace(""""""", "") |
| 152 | + |
| 153 | + if (fixedResponse.isNotEmpty()) { |
| 154 | + log.info("Updating response with fixed version from verification: $fixedResponse") |
| 155 | + return message.update(fixedResponse) |
| 156 | + } |
| 157 | + |
| 158 | + context.retry( |
| 159 | + max = retryMax, |
| 160 | + details = mapOf("error" to "The following instructions must be followed: $instructionsText"), |
| 161 | + reason = RESPONSE_GUIDE_RETRY_REASON |
| 162 | + ) |
| 163 | + |
| 164 | + return message |
| 165 | + } |
| 166 | +} |
0 commit comments