HEDit/src/agents/feedback_summarizer.py at 6cf098690e857083f72424efe4e8396cc5fd5f75 · Annotation-Garden/HEDit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""Feedback summarization agent for condensing validation errors and feedback.

This agent summarizes validation errors and evaluation/assessment feedback
into concise, actionable points for the annotation agent.
"""

import logging

from langchain_core.language_models import BaseChatModel
from langchain_core.messages import HumanMessage, SystemMessage

from src.agents.state import HedAnnotationState

logger = logging.getLogger(__name__)


class FeedbackSummarizer:
    """Agent that summarizes validation errors and feedback.

    Uses a cheap, fast model to condense verbose error messages and
    feedback into concise, actionable summaries for the annotation agent.
    """

    def __init__(self, llm: BaseChatModel) -> None:
        """Initialize the feedback summarizer.

        Args:
            llm: Language model for summarization
        """
        self.llm = llm

    def _build_system_prompt(self) -> str:
        """Build the system prompt for feedback summarization.

        Returns:
            System prompt string
        """
        return """You are a feedback summarizer for HED annotation generation.

Your task: Condense validation errors and feedback into concise, actionable points.

Guidelines:
1. Extract only the KEY issues that need fixing
2. Remove verbose error messages and stack traces
3. Group similar errors together
4. Use bullet points for clarity
5. Keep it under 100 words total
6. Focus on WHAT to fix, not technical details

Example 1 - Tag errors:
Input: "TAG_EXTENSION_INVALID: 'Red' does not have 'Property' as its parent..."

Output:
- Remove parent paths from existing tags (use Red, Circle - NOT Property/Red, Item/Circle)

Example 2 - Semantic grouping:
Input: "GROUPING: needs-improvement. Properties like Red and Circle should be grouped together..."

Output:
- Group object properties together: (Red, Circle) not Red, Circle
- Nest agent-action relationships: ((Agent), (Action, (Object)))

Be direct and actionable."""

    def _build_user_prompt(self, state: HedAnnotationState) -> str:
        """Build the user prompt with errors and feedback.

        Args:
            state: Current annotation workflow state

        Returns:
            User prompt string
        """
        feedback_parts = []

        # Add validation errors (use augmented version with remediation guidance for LLM)
        if state.get("validation_errors_augmented"):
            errors = "\n".join(state["validation_errors_augmented"])
            feedback_parts.append(f"VALIDATION ERRORS:\n{errors}")

        # Add evaluation feedback
        if state.get("evaluation_feedback") and not state.get("is_faithful"):
            feedback_parts.append(f"EVALUATION FEEDBACK:\n{state['evaluation_feedback']}")

        # Add assessment feedback
        if state.get("assessment_feedback") and not state.get("is_complete"):
            feedback_parts.append(f"ASSESSMENT FEEDBACK:\n{state['assessment_feedback']}")

        if not feedback_parts:
            return "No feedback to summarize."

        return (
            "\n\n".join(feedback_parts) + "\n\nSummarize the above into concise, actionable points:"
        )

    async def summarize(self, state: HedAnnotationState) -> dict:
        """Summarize validation errors and feedback.

        Args:
            state: Current annotation workflow state

        Returns:
            State update with summarized feedback
        """
        # Build prompts
        system_prompt = self._build_system_prompt()
        user_prompt = self._build_user_prompt(state)

        # Skip if no feedback to summarize
        if user_prompt == "No feedback to summarize.":
            return {}

        # Generate summary
        messages = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=user_prompt),
        ]

        try:
            response = await self.llm.ainvoke(messages)
        except Exception as e:
            logger.error("Feedback summarization LLM invocation failed: %s", e, exc_info=True)
            raise
        content = response.content
        summarized_feedback = content.strip() if isinstance(content, str) else str(content)

        # Replace verbose feedback with summary (only augmented fields for LLM, not raw for users)
        return {
            "validation_errors_augmented": (
                [summarized_feedback] if state.get("validation_errors_augmented") else []
            ),
            "evaluation_feedback": summarized_feedback if state.get("evaluation_feedback") else "",
            "assessment_feedback": summarized_feedback if state.get("assessment_feedback") else "",
        }