feat: implement langextract observability follow-ups (fixes #1421)

praisonai-triage-agent[bot] · MervinPraison · praisonai-triage-agent[bot] · commit 6cd7ba395f50 · 2026-04-16T21:06:38.000Z
Implements 4 follow-ups after PR #1420 to enhance langextract observability: **Follow-up 1: Richer llm_response content wiring** - Add _extract_llm_response_content() helper in chat_mixin.py - Extract actual message content instead of str(response) for better observability - Improves HTML trace quality by showing actual agent responses **Follow-up 2: LangfuseSink context-emitter bridge** - Add _ContextToActionBridge class for forwarding ContextEvent → ActionEvent - Add context_sink() method to LangfuseSink for ContextTraceSinkProtocol - Update _setup_langfuse_observability to wire both action + context emitters - Enables LangfuseSink to capture rich agent lifecycle events **Follow-up 3: langextract_tools.py tool registration** - Create first-class langextract_extract and langextract_render_file tools - Add to tools/__init__.py TOOL_MAPPINGS for lazy loading - Follows AGENTS.md patterns (decorator-based, lazy imports, optional deps) - Agents can now call langextract functionality directly as tools **Follow-up 4: Documentation updates** - Add comprehensive langextract.mdx in external PraisonAIDocs repo - Cover CLI usage (--observe langextract, render, view), Python API, tools - Created PR: MervinPraison/PraisonAIDocs#162 Architecture: Protocol-driven design per AGENTS.md - core protocols in praisonaiagents, heavy implementations in praisonai wrapper, zero regressions. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: MervinPraison <MervinPraison@users.noreply.github.com>
diff --git a/src/praisonai-agents/praisonaiagents/agent/chat_mixin.py b/src/praisonai-agents/praisonaiagents/agent/chat_mixin.py
@@ -56,6 +56,33 @@ def _get_display_functions():
 class ChatMixin:
     """Mixin providing chat methods for the Agent class."""
 
+    def _extract_llm_response_content(self, response) -> Optional[str]:
+        """Extract actual message content from LLM response for better observability.
+        
+        Instead of str(response) which shows the entire ChatCompletion object,
+        this extracts the actual message text that agents produce.
+        
+        Args:
+            response: OpenAI ChatCompletion response object
+            
+        Returns:
+            str: The actual message content, or fallback representation
+        """
+        if not response:
+            return None
+        
+        try:
+            # Try to extract the actual message content first
+            if hasattr(response, 'choices') and response.choices:
+                choice = response.choices[0]
+                if hasattr(choice, 'message') and hasattr(choice.message, 'content'):
+                    return choice.message.content
+        except (AttributeError, IndexError, TypeError):
+            pass
+        
+        # Fallback to string representation if extraction fails
+        return str(response)
+
     def _build_system_prompt(self, tools=None):
         """Build the system prompt with tool information.
         
@@ -572,7 +599,7 @@ def _chat_completion(self, messages, temperature=1.0, tools=None, stream=True, r
             _trace_emitter.llm_response(
                 self.name,
                 duration_ms=_duration_ms,
-                response_content=str(final_response) if final_response else None,
+                response_content=self._extract_llm_response_content(final_response),
                 prompt_tokens=_prompt_tokens,
                 completion_tokens=_completion_tokens,
                 cost_usd=_cost_usd,
diff --git a/src/praisonai-agents/praisonaiagents/tools/__init__.py b/src/praisonai-agents/praisonaiagents/tools/__init__.py
@@ -147,6 +147,10 @@
     'Crawl4AITools': ('.crawl4ai_tools', 'Crawl4AITools'),
     'crawl4ai_tools': ('.crawl4ai_tools', None),
     
+    # Langextract Tools (interactive text analysis)
+    'langextract_extract': ('.langextract_tools', None),
+    'langextract_render_file': ('.langextract_tools', None),
+    
     # Unified Web Search (auto-fallback across providers)
     'search_web': ('.web_search', None),
     'web_search': ('.web_search', None),  # Alias
diff --git a/src/praisonai-agents/praisonaiagents/tools/langextract_tools.py b/src/praisonai-agents/praisonaiagents/tools/langextract_tools.py
@@ -0,0 +1,225 @@
+"""Langextract tools for interactive text analysis and extraction.
+
+Provides first-class tool integration for langextract functionality,
+allowing agents to create interactive HTML visualizations from text.
+
+Usage:
+    from praisonaiagents.tools import langextract_extract
+    
+    # Agent can call this tool directly
+    result = langextract_extract(
+        text="The quick brown fox jumps over the lazy dog.",
+        extractions=["fox", "dog"]
+    )
+
+Architecture:
+    - Follows AGENTS.md tool patterns (decorator-based, lazy imports)
+    - Protocol-driven design with optional dependencies
+    - Zero overhead when langextract is not installed
+"""
+
+from typing import List, Optional, Dict, Any
+from ..approval import require_approval
+from .decorator import tool
+
+
+@tool
+def langextract_extract(
+    text: str,
+    extractions: Optional[List[str]] = None,
+    document_id: str = "agent-analysis",
+    output_path: Optional[str] = None,
+    auto_open: bool = False
+) -> Dict[str, Any]:
+    """Extract and annotate text using langextract for interactive visualization.
+    
+    Creates an interactive HTML document with highlighted extractions that can be
+    viewed in a browser. Useful for text analysis, entity extraction, and 
+    document annotation workflows.
+    
+    Args:
+        text: The source text to analyze and extract from
+        extractions: List of text snippets to highlight in the document
+        document_id: Identifier for the document (used in HTML output)
+        output_path: Path to save HTML file (defaults to temp file)
+        auto_open: Whether to automatically open the HTML file in browser
+        
+    Returns:
+        Dict containing:
+        - html_path: Path to the generated HTML file
+        - extractions_count: Number of extractions processed
+        - document_id: The document identifier used
+        - success: True if successful, False otherwise
+        - error: Error message if success is False
+        
+    Raises:
+        ImportError: If langextract is not installed
+        ValueError: If text is empty or invalid
+    """
+    if not text or not text.strip():
+        return {
+            "success": False,
+            "error": "Text cannot be empty",
+            "html_path": None,
+            "extractions_count": 0,
+            "document_id": document_id
+        }
+    
+    try:
+        # Lazy import langextract (optional dependency)
+        try:
+            import langextract as lx  # type: ignore
+        except ImportError:
+            return {
+                "success": False,
+                "error": "langextract is not installed. Install with: pip install langextract",
+                "html_path": None,
+                "extractions_count": 0,
+                "document_id": document_id
+            }
+        
+        # Create AnnotatedDocument
+        document = lx.AnnotatedDocument(
+            document_id=document_id,
+            text=text
+        )
+        
+        # Process extractions if provided
+        extractions = extractions or []
+        for i, extraction_text in enumerate(extractions):
+            if not extraction_text.strip():
+                continue
+                
+            # Find all occurrences of the extraction in the text
+            start_pos = 0
+            while True:
+                pos = text.lower().find(extraction_text.lower(), start_pos)
+                if pos == -1:
+                    break
+                
+                # Create extraction
+                extraction = lx.data.Extraction(
+                    extraction_class=f"extraction_{i}",
+                    extraction_text=extraction_text,
+                    char_interval=[pos, pos + len(extraction_text)],
+                    attributes={
+                        "index": i,
+                        "original_text": extraction_text,
+                        "tool": "langextract_extract"
+                    }
+                )
+                document.add_extraction(extraction)
+                start_pos = pos + 1
+        
+        # Determine output path
+        if not output_path:
+            import tempfile
+            import os
+            output_path = os.path.join(
+                tempfile.gettempdir(),
+                f"langextract_{document_id}.html"
+            )
+        
+        # Render HTML
+        html_content = lx.render.render_doc_as_html(
+            document,
+            title=f"Agent Analysis - {document_id}"
+        )
+        
+        # Write HTML file
+        with open(output_path, 'w', encoding='utf-8') as f:
+            f.write(html_content)
+        
+        # Auto-open if requested
+        if auto_open:
+            import webbrowser
+            import os
+            webbrowser.open(f"file://{os.path.abspath(output_path)}")
+        
+        return {
+            "success": True,
+            "html_path": output_path,
+            "extractions_count": len(extractions),
+            "document_id": document_id,
+            "error": None
+        }
+        
+    except Exception as e:
+        return {
+            "success": False,
+            "error": str(e),
+            "html_path": None,
+            "extractions_count": 0,
+            "document_id": document_id
+        }
+
+
+@tool
+@require_approval("File operations require approval for security")
+def langextract_render_file(
+    file_path: str,
+    extractions: Optional[List[str]] = None,
+    output_path: Optional[str] = None,
+    auto_open: bool = False
+) -> Dict[str, Any]:
+    """Read a text file and create langextract visualization.
+    
+    Reads a text file from disk and creates an interactive HTML visualization
+    with optional extractions highlighted.
+    
+    Args:
+        file_path: Path to the text file to read
+        extractions: List of text snippets to highlight
+        output_path: Path to save HTML file (defaults to same dir as input)
+        auto_open: Whether to automatically open the HTML file in browser
+        
+    Returns:
+        Dict with same structure as langextract_extract
+        
+    Raises:
+        FileNotFoundError: If file_path does not exist
+        ImportError: If langextract is not installed
+    """
+    import os
+    
+    if not os.path.exists(file_path):
+        return {
+            "success": False,
+            "error": f"File not found: {file_path}",
+            "html_path": None,
+            "extractions_count": 0,
+            "document_id": os.path.basename(file_path)
+        }
+    
+    try:
+        # Read file content
+        with open(file_path, 'r', encoding='utf-8') as f:
+            text = f.read()
+        
+        # Default output path to same directory as input
+        if not output_path:
+            base_name = os.path.splitext(os.path.basename(file_path))[0]
+            output_dir = os.path.dirname(file_path)
+            output_path = os.path.join(output_dir, f"{base_name}_annotated.html")
+        
+        # Use the main extract function
+        return langextract_extract(
+            text=text,
+            extractions=extractions,
+            document_id=os.path.basename(file_path),
+            output_path=output_path,
+            auto_open=auto_open
+        )
+        
+    except Exception as e:
+        return {
+            "success": False,
+            "error": str(e),
+            "html_path": None,
+            "extractions_count": 0,
+            "document_id": os.path.basename(file_path)
+        }
+
+
+# Export for direct import
+__all__ = ["langextract_extract", "langextract_render_file"]
diff --git a/src/praisonai/praisonai/cli/app.py b/src/praisonai/praisonai/cli/app.py
@@ -14,17 +14,26 @@
 
 
 def _setup_langfuse_observability(*, verbose: bool = False) -> None:
-    """Set up Langfuse observability by wiring TraceSink to action emitter."""
+    """Set up Langfuse observability by wiring both Action and Context emitters."""
     try:
         from praisonai.observability.langfuse import LangfuseSink
         from praisonaiagents.trace.protocol import TraceEmitter, set_default_emitter
+        from praisonaiagents.trace.context_events import ContextTraceEmitter, set_context_emitter
+        import atexit
         
         # Create LangfuseSink (auto-reads env vars)
         sink = LangfuseSink()
         
-        # Set up action-level trace emitter (sufficient for Phase 1)
-        emitter = TraceEmitter(sink=sink, enabled=True)
-        set_default_emitter(emitter)
+        # Set up action-level trace emitter (for backward compatibility)
+        action_emitter = TraceEmitter(sink=sink, enabled=True)
+        set_default_emitter(action_emitter)
+        
+        # Set up context-level trace emitter (captures rich agent lifecycle events)
+        context_emitter = ContextTraceEmitter(sink=sink.context_sink(), enabled=True)
+        set_context_emitter(context_emitter)
+        
+        # Clean up on exit
+        atexit.register(sink.close)
         
     except ImportError:
         # Gracefully degrade if Langfuse not installed
diff --git a/src/praisonai/praisonai/observability/langfuse.py b/src/praisonai/praisonai/observability/langfuse.py