dataeval_dingo/dingo/model/llm/agent/agent_hallucination.py at dev · shijinpjlab/dataeval_dingo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
"""
Agent-Based Hallucination Detection

This module provides an enhanced hallucination detector that uses web search to verify
factual claims when context is not provided. It extends the standard hallucination
detection with adaptive context gathering capabilities.

Key Features:
- Automatic fallback to web search when context is missing
- Claim extraction and individual verification
- Multi-source fact checking
- Transparent reasoning trails
- Backward compatible with existing LLMHallucination
"""

import json
from typing import Any, Dict, List

from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail, QualityLabel
from dingo.model import Model
from dingo.model.llm.agent.base_agent import BaseAgent
from dingo.utils import log


@Model.llm_register("AgentHallucination")
class AgentHallucination(BaseAgent):
    """
    Agent-based hallucination detector with adaptive context gathering.

    Implementation Pattern: Imperative (Custom Workflow)
    ===================================================

    This agent uses a fully custom workflow with explicit control over each step:
    claim extraction → web search → context synthesis → evaluation. Unlike framework-
    driven agents, this pattern provides complete control over the execution flow and
    can compose with existing Dingo evaluators.

    Key Characteristics:
    -------------------
    - Implements custom `eval()` method with explicit workflow logic
    - Manually calls `execute_tool()` for web search operations
    - Manually calls `send_messages()` for LLM interactions
    - Can delegate to existing evaluators (e.g., LLMHallucination)
    - Full control over execution flow and error handling

    Workflow Steps:
    --------------
    1. Check if context is available in input data
    2. If context exists: Delegate to LLMHallucination evaluator
    3. If context missing:
       a. Extract factual claims from response (LLM call)
       b. Search web for each claim (Tavily tool)
       c. Synthesize context from search results (LLM call)
       d. Evaluate with synthesized context (LLMHallucination)

    When to Use This Pattern:
    ------------------------
    ✅ Need fine-grained control over workflow steps
    ✅ Want to compose with existing Dingo evaluators
    ✅ Prefer explicit over implicit behavior
    ✅ Have domain-specific workflow requirements
    ✅ Need to implement conditional logic between steps

    When NOT to Use:
    ---------------
    ❌ Want framework-managed multi-step reasoning
    ❌ Prefer declarative agent configuration
    ❌ Need rapid prototyping with minimal code
    ❌ Complex reasoning that benefits from ReAct pattern

    See Also:
    --------
    - docs/agent_development_guide.md - Comprehensive agent development guide
    - AgentFactCheck - LangChain framework pattern for comparison
    - LLMHallucination - Base evaluator used for delegation

    Configuration Example:
    {
        "name": "AgentHallucination",
        "config": {
            "key": "your-openai-api-key",
            "api_url": "https://api.openai.com/v1",
            "model": "gpt-4.1-mini-2025-04-14",
            "parameters": {
                "agent_config": {
                    "max_iterations": 3,
                    "tools": {
                        "tavily_search": {
                            "api_key": "your-tavily-api-key",
                            "max_results": 5,
                            "search_depth": "advanced"
                        }
                    }
                }
            }
        }
    }
    """

    # Metadata for documentation
    _metric_info = {
        "category": "SFT Data Assessment Metrics - Agent-Enhanced",
        "metric_name": "AgentHallucination",
        "description": "Agent-based hallucination detection with automatic web search for missing context",
        "features": [
            "Automatic context gathering via web search",
            "Factual claim extraction",
            "Multi-source verification",
            "Transparent reasoning trails"
        ]
    }

    available_tools = ["tavily_search"]
    max_iterations = 3
    threshold = 0.5

    # Claim extraction prompt
    CLAIM_EXTRACTION_PROMPT = """You are a precise claim extractor. Extract all factual claims from the given text.

A factual claim is a statement that can be verified as true or false (e.g., "Paris is the capital of France", "Einstein won the Nobel Prize in 1921").

Do NOT include:
- Opinions or subjective statements
- Questions
- Procedural instructions
- Generic statements that cannot be fact-checked

Return ONLY a JSON array of claim strings. If no factual claims exist, return an empty array.

Text: {content}

Return format:
{{"claims": ["claim 1", "claim 2", ...]}}
"""

    _required_fields = [RequiredField.CONTENT, RequiredField.CONTEXT]

    @classmethod
    def eval(cls, input_data: Data) -> EvalDetail:
        """
        Main evaluation method with intelligent context handling.

        Workflow:
        1. Check if context is provided
        2. If yes: Use standard LLMHallucination
        3. If no: Execute agent workflow (claim extraction + web search)
        4. Return evaluation with provenance information

        Args:
            input_data: Data object with content and optional context

        Returns:
            EvalDetail with hallucination evaluation results
        """
        # Check if context is available
        has_context = cls._has_context(input_data)

        if has_context:
            log.info(f"{cls.__name__}: Context provided, using LLMHallucination")
            return cls._eval_with_context(input_data)
        else:
            log.info(f"{cls.__name__}: No context, using web search agent workflow")
            return cls._eval_with_web_search(input_data)

    @classmethod
    def _has_context(cls, input_data: Data) -> bool:
        """
        Check if input data has usable context.

        Args:
            input_data: Data object to check

        Returns:
            True if context is present and non-empty
        """
        # Check direct context attribute
        if hasattr(input_data, 'context') and input_data.context:
            return True

        # Check raw_data fallback
        if hasattr(input_data, 'raw_data') and input_data.raw_data:
            if 'context' in input_data.raw_data and input_data.raw_data['context']:
                return True

        return False

    @classmethod
    def _eval_with_context(cls, input_data: Data) -> EvalDetail:
        """
        Delegate to existing LLMHallucination when context is available.

        Args:
            input_data: Data object with context

        Returns:
            EvalDetail from LLMHallucination
        """
        try:
            from dingo.model.llm.llm_hallucination import LLMHallucination

            # Share configuration with LLMHallucination
            if hasattr(cls, 'dynamic_config') and cls.dynamic_config:
                LLMHallucination.dynamic_config = cls.dynamic_config

            # Use standard hallucination detection
            result = LLMHallucination.eval(input_data)

            # Add metadata about evaluation method
            if result.reason:
                result.reason.append(
                    f"\n💡 Evaluation Method: Standard LLMHallucination (context provided)"
                )
            else:
                result.reason = [
                    f"💡 Evaluation Method: Standard LLMHallucination (context provided)"
                ]

            return result

        except Exception as e:
            log.error(f"LLMHallucination delegation failed: {e}")
            result = EvalDetail(metric=cls.__name__)
            result.status = True
            result.label = [f"{QualityLabel.QUALITY_BAD_PREFIX}DELEGATION_ERROR"]
            result.reason = [f"Failed to delegate to LLMHallucination: {str(e)}"]
            return result

    @classmethod
    def _eval_with_web_search(cls, input_data: Data) -> EvalDetail:
        """
        Execute agent workflow: extract claims → web search → evaluate.

        Args:
            input_data: Data object without context

        Returns:
            EvalDetail with agent-based evaluation
        """
        try:
            # Ensure client is created
            cls.create_client()

            # Step 1: Extract factual claims
            log.info(f"{cls.__name__}: Extracting factual claims")
            claims = cls._extract_claims(input_data)

            if not claims:
                log.info(f"{cls.__name__}: No factual claims found")
                result = EvalDetail(metric=cls.__name__)
                result.status = False
                result.label = [QualityLabel.QUALITY_GOOD]
                result.reason = [
                    "✅ No factual claims detected in response",
                    "💡 Evaluation Method: Agent-based (no claims to verify)"
                ]
                return result

            log.info(f"{cls.__name__}: Extracted {len(claims)} claims")

            # Step 2: Search web for each claim
            log.info(f"{cls.__name__}: Searching web for verification")
            search_results = cls._search_claims(claims)

            # Step 3: Synthesize context from search results
            synthesized_context = cls._synthesize_context(search_results)

            if not synthesized_context:
                log.warning(f"{cls.__name__}: Failed to gather web context")
                result = EvalDetail(metric=cls.__name__)
                result.status = True
                result.label = [f"{QualityLabel.QUALITY_BAD_PREFIX}NO_WEB_CONTEXT"]
                result.reason = [
                    "⚠️ Unable to gather sufficient web context for verification",
                    f"📊 Attempted to verify {len(claims)} claims",
                    "💡 Evaluation Method: Agent-based (web search failed)"
                ]
                return result

            # Step 4: Create enriched data with synthesized context
            enriched_data = Data(
                content=input_data.content,
                prompt=getattr(input_data, 'prompt', ''),
                context=synthesized_context
            )

            # Step 5: Evaluate with standard method
            log.info(f"{cls.__name__}: Evaluating with synthesized context")
            result = cls._eval_with_context(enriched_data)

            # Step 6: Add agent provenance information
            agent_info = [
                "\n" + "=" * 60,
                "🤖 Agent-Based Evaluation Details",
                "=" * 60,
                f"📝 Factual Claims Extracted: {len(claims)}",
                f"🔍 Web Searches Performed: {len(search_results)}",
                f"📚 Context Sources Synthesized: {len(synthesized_context)}",
                "",
                "💡 Evaluation Method: Agent-based with web search",
                "   • Claims extracted from response",
                "   • Each claim verified via Tavily web search",
                "   • Context synthesized from search results",
                "   • Standard hallucination detection applied"
            ]

            if result.reason:
                result.reason.extend(agent_info)
            else:
                result.reason = agent_info

            return result

        except Exception as e:
            log.error(f"{cls.__name__} agent workflow failed: {e}")
            result = EvalDetail(metric=cls.__name__)
            result.status = True
            result.label = [f"{QualityLabel.QUALITY_BAD_PREFIX}AGENT_ERROR"]
            result.reason = [
                f"❌ Agent workflow failed: {str(e)}",
                "💡 Evaluation Method: Agent-based (error occurred)"
            ]
            return result

    @classmethod
    def _extract_claims(cls, input_data: Data) -> List[str]:
        """
        Extract factual claims from response using LLM.

        Args:
            input_data: Data object with content

        Returns:
            List of factual claim strings
        """
        try:
            # Build claim extraction prompt
            prompt = cls.CLAIM_EXTRACTION_PROMPT.format(
                content=input_data.content
            )

            # Call LLM
            messages = [{"role": "user", "content": prompt}]
            response = cls.send_messages(messages)

            # Parse JSON response
            # Handle markdown code blocks
            response = response.strip()
            if response.startswith("```json"):
                response = response[7:]
            if response.startswith("```"):
                response = response[3:]
            if response.endswith("```"):
                response = response[:-3]
            response = response.strip()

            data = json.loads(response)
            claims = data.get('claims', [])

            # Validate claims
            if not isinstance(claims, list):
                log.warning("Claims extraction returned non-list")
                return []

            # Filter out empty claims
            claims = [c.strip() for c in claims if c and c.strip()]

            return claims[:5]  # Limit to 5 claims to avoid excessive API calls

        except json.JSONDecodeError as e:
            log.error(f"Failed to parse claims JSON: {e}")
            log.debug(f"Response was: {response}")
            return []
        except Exception as e:
            log.error(f"Claim extraction failed: {e}")
            return []

    @classmethod
    def _search_claims(cls, claims: List[str]) -> List[Dict[str, Any]]:
        """
        Search web for each claim using Tavily.

        Args:
            claims: List of factual claims to verify

        Returns:
            List of search results
        """
        results = []

        for claim in claims:
            try:
                result = cls.execute_tool('tavily_search', query=claim)
                results.append(result)
            except Exception as e:
                log.warning(f"Search failed for claim '{claim}': {e}")
                results.append({
                    'success': False,
                    'query': claim,
                    'error': str(e)
                })

        return results

    @classmethod
    def _synthesize_context(cls, search_results: List[Dict[str, Any]]) -> List[str]:
        """
        Synthesize context from web search results.

        Args:
            search_results: List of Tavily search results

        Returns:
            List of context strings
        """
        contexts = []

        for result in search_results:
            if not result.get('success'):
                continue

            # Add AI-generated answer if available
            if result.get('answer'):
                contexts.append(result['answer'])

            # Add top search result contents
            for search_item in result.get('results', [])[:2]:  # Top 2 per claim
                content = search_item.get('content', '').strip()
                if content:
                    # Add source attribution
                    source = search_item.get('url', 'Unknown')
                    contexts.append(f"{content} [Source: {source}]")

        return contexts

    @classmethod
    def plan_execution(cls, input_data: Data) -> List[Dict[str, Any]]:
        """
        Define execution plan (not used in current implementation).

        The current implementation uses a direct workflow in _eval_with_web_search
        rather than the generic plan_execution framework.
        """
        # Not used - we implement custom workflow in eval()
        return []

    @classmethod
    def aggregate_results(cls, input_data: Data, results: List[Any]) -> EvalDetail:
        """
        Aggregate results (not used in current implementation).

        The current implementation uses a direct workflow in _eval_with_web_search
        rather than the generic aggregate_results framework.
        """
        # Not used - we implement custom workflow in eval()
        return EvalDetail(metric=cls.__name__)