Skip to content

Commit 6cd7ba3

Browse files
feat: implement langextract observability follow-ups (fixes #1421)
Implements 4 follow-ups after PR #1420 to enhance langextract observability: **Follow-up 1: Richer llm_response content wiring** - Add _extract_llm_response_content() helper in chat_mixin.py - Extract actual message content instead of str(response) for better observability - Improves HTML trace quality by showing actual agent responses **Follow-up 2: LangfuseSink context-emitter bridge** - Add _ContextToActionBridge class for forwarding ContextEvent → ActionEvent - Add context_sink() method to LangfuseSink for ContextTraceSinkProtocol - Update _setup_langfuse_observability to wire both action + context emitters - Enables LangfuseSink to capture rich agent lifecycle events **Follow-up 3: langextract_tools.py tool registration** - Create first-class langextract_extract and langextract_render_file tools - Add to tools/__init__.py TOOL_MAPPINGS for lazy loading - Follows AGENTS.md patterns (decorator-based, lazy imports, optional deps) - Agents can now call langextract functionality directly as tools **Follow-up 4: Documentation updates** - Add comprehensive langextract.mdx in external PraisonAIDocs repo - Cover CLI usage (--observe langextract, render, view), Python API, tools - Created PR: MervinPraison/PraisonAIDocs#162 Architecture: Protocol-driven design per AGENTS.md - core protocols in praisonaiagents, heavy implementations in praisonai wrapper, zero regressions. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: MervinPraison <MervinPraison@users.noreply.github.com>
1 parent a6094f5 commit 6cd7ba3

File tree

5 files changed

+348
-6
lines changed

5 files changed

+348
-6
lines changed

src/praisonai-agents/praisonaiagents/agent/chat_mixin.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,33 @@ def _get_display_functions():
5656
class ChatMixin:
5757
"""Mixin providing chat methods for the Agent class."""
5858

59+
def _extract_llm_response_content(self, response) -> Optional[str]:
60+
"""Extract actual message content from LLM response for better observability.
61+
62+
Instead of str(response) which shows the entire ChatCompletion object,
63+
this extracts the actual message text that agents produce.
64+
65+
Args:
66+
response: OpenAI ChatCompletion response object
67+
68+
Returns:
69+
str: The actual message content, or fallback representation
70+
"""
71+
if not response:
72+
return None
73+
74+
try:
75+
# Try to extract the actual message content first
76+
if hasattr(response, 'choices') and response.choices:
77+
choice = response.choices[0]
78+
if hasattr(choice, 'message') and hasattr(choice.message, 'content'):
79+
return choice.message.content
80+
except (AttributeError, IndexError, TypeError):
81+
pass
82+
83+
# Fallback to string representation if extraction fails
84+
return str(response)
85+
5986
def _build_system_prompt(self, tools=None):
6087
"""Build the system prompt with tool information.
6188
@@ -572,7 +599,7 @@ def _chat_completion(self, messages, temperature=1.0, tools=None, stream=True, r
572599
_trace_emitter.llm_response(
573600
self.name,
574601
duration_ms=_duration_ms,
575-
response_content=str(final_response) if final_response else None,
602+
response_content=self._extract_llm_response_content(final_response),
576603
prompt_tokens=_prompt_tokens,
577604
completion_tokens=_completion_tokens,
578605
cost_usd=_cost_usd,

src/praisonai-agents/praisonaiagents/tools/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,10 @@
147147
'Crawl4AITools': ('.crawl4ai_tools', 'Crawl4AITools'),
148148
'crawl4ai_tools': ('.crawl4ai_tools', None),
149149

150+
# Langextract Tools (interactive text analysis)
151+
'langextract_extract': ('.langextract_tools', None),
152+
'langextract_render_file': ('.langextract_tools', None),
153+
150154
# Unified Web Search (auto-fallback across providers)
151155
'search_web': ('.web_search', None),
152156
'web_search': ('.web_search', None), # Alias
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
"""Langextract tools for interactive text analysis and extraction.
2+
3+
Provides first-class tool integration for langextract functionality,
4+
allowing agents to create interactive HTML visualizations from text.
5+
6+
Usage:
7+
from praisonaiagents.tools import langextract_extract
8+
9+
# Agent can call this tool directly
10+
result = langextract_extract(
11+
text="The quick brown fox jumps over the lazy dog.",
12+
extractions=["fox", "dog"]
13+
)
14+
15+
Architecture:
16+
- Follows AGENTS.md tool patterns (decorator-based, lazy imports)
17+
- Protocol-driven design with optional dependencies
18+
- Zero overhead when langextract is not installed
19+
"""
20+
21+
from typing import List, Optional, Dict, Any
22+
from ..approval import require_approval
23+
from .decorator import tool
24+
25+
26+
@tool
27+
def langextract_extract(
28+
text: str,
29+
extractions: Optional[List[str]] = None,
30+
document_id: str = "agent-analysis",
31+
output_path: Optional[str] = None,
32+
auto_open: bool = False
33+
) -> Dict[str, Any]:
34+
"""Extract and annotate text using langextract for interactive visualization.
35+
36+
Creates an interactive HTML document with highlighted extractions that can be
37+
viewed in a browser. Useful for text analysis, entity extraction, and
38+
document annotation workflows.
39+
40+
Args:
41+
text: The source text to analyze and extract from
42+
extractions: List of text snippets to highlight in the document
43+
document_id: Identifier for the document (used in HTML output)
44+
output_path: Path to save HTML file (defaults to temp file)
45+
auto_open: Whether to automatically open the HTML file in browser
46+
47+
Returns:
48+
Dict containing:
49+
- html_path: Path to the generated HTML file
50+
- extractions_count: Number of extractions processed
51+
- document_id: The document identifier used
52+
- success: True if successful, False otherwise
53+
- error: Error message if success is False
54+
55+
Raises:
56+
ImportError: If langextract is not installed
57+
ValueError: If text is empty or invalid
58+
"""
59+
if not text or not text.strip():
60+
return {
61+
"success": False,
62+
"error": "Text cannot be empty",
63+
"html_path": None,
64+
"extractions_count": 0,
65+
"document_id": document_id
66+
}
67+
68+
try:
69+
# Lazy import langextract (optional dependency)
70+
try:
71+
import langextract as lx # type: ignore
72+
except ImportError:
73+
return {
74+
"success": False,
75+
"error": "langextract is not installed. Install with: pip install langextract",
76+
"html_path": None,
77+
"extractions_count": 0,
78+
"document_id": document_id
79+
}
80+
81+
# Create AnnotatedDocument
82+
document = lx.AnnotatedDocument(
83+
document_id=document_id,
84+
text=text
85+
)
86+
87+
# Process extractions if provided
88+
extractions = extractions or []
89+
for i, extraction_text in enumerate(extractions):
90+
if not extraction_text.strip():
91+
continue
92+
93+
# Find all occurrences of the extraction in the text
94+
start_pos = 0
95+
while True:
96+
pos = text.lower().find(extraction_text.lower(), start_pos)
97+
if pos == -1:
98+
break
99+
100+
# Create extraction
101+
extraction = lx.data.Extraction(
102+
extraction_class=f"extraction_{i}",
103+
extraction_text=extraction_text,
104+
char_interval=[pos, pos + len(extraction_text)],
105+
attributes={
106+
"index": i,
107+
"original_text": extraction_text,
108+
"tool": "langextract_extract"
109+
}
110+
)
111+
document.add_extraction(extraction)
112+
start_pos = pos + 1
113+
114+
# Determine output path
115+
if not output_path:
116+
import tempfile
117+
import os
118+
output_path = os.path.join(
119+
tempfile.gettempdir(),
120+
f"langextract_{document_id}.html"
121+
)
122+
123+
# Render HTML
124+
html_content = lx.render.render_doc_as_html(
125+
document,
126+
title=f"Agent Analysis - {document_id}"
127+
)
128+
129+
# Write HTML file
130+
with open(output_path, 'w', encoding='utf-8') as f:
131+
f.write(html_content)
132+
133+
# Auto-open if requested
134+
if auto_open:
135+
import webbrowser
136+
import os
137+
webbrowser.open(f"file://{os.path.abspath(output_path)}")
138+
139+
return {
140+
"success": True,
141+
"html_path": output_path,
142+
"extractions_count": len(extractions),
143+
"document_id": document_id,
144+
"error": None
145+
}
146+
147+
except Exception as e:
148+
return {
149+
"success": False,
150+
"error": str(e),
151+
"html_path": None,
152+
"extractions_count": 0,
153+
"document_id": document_id
154+
}
155+
156+
157+
@tool
158+
@require_approval("File operations require approval for security")
159+
def langextract_render_file(
160+
file_path: str,
161+
extractions: Optional[List[str]] = None,
162+
output_path: Optional[str] = None,
163+
auto_open: bool = False
164+
) -> Dict[str, Any]:
165+
"""Read a text file and create langextract visualization.
166+
167+
Reads a text file from disk and creates an interactive HTML visualization
168+
with optional extractions highlighted.
169+
170+
Args:
171+
file_path: Path to the text file to read
172+
extractions: List of text snippets to highlight
173+
output_path: Path to save HTML file (defaults to same dir as input)
174+
auto_open: Whether to automatically open the HTML file in browser
175+
176+
Returns:
177+
Dict with same structure as langextract_extract
178+
179+
Raises:
180+
FileNotFoundError: If file_path does not exist
181+
ImportError: If langextract is not installed
182+
"""
183+
import os
184+
185+
if not os.path.exists(file_path):
186+
return {
187+
"success": False,
188+
"error": f"File not found: {file_path}",
189+
"html_path": None,
190+
"extractions_count": 0,
191+
"document_id": os.path.basename(file_path)
192+
}
193+
194+
try:
195+
# Read file content
196+
with open(file_path, 'r', encoding='utf-8') as f:
197+
text = f.read()
198+
199+
# Default output path to same directory as input
200+
if not output_path:
201+
base_name = os.path.splitext(os.path.basename(file_path))[0]
202+
output_dir = os.path.dirname(file_path)
203+
output_path = os.path.join(output_dir, f"{base_name}_annotated.html")
204+
205+
# Use the main extract function
206+
return langextract_extract(
207+
text=text,
208+
extractions=extractions,
209+
document_id=os.path.basename(file_path),
210+
output_path=output_path,
211+
auto_open=auto_open
212+
)
213+
214+
except Exception as e:
215+
return {
216+
"success": False,
217+
"error": str(e),
218+
"html_path": None,
219+
"extractions_count": 0,
220+
"document_id": os.path.basename(file_path)
221+
}
222+
223+
224+
# Export for direct import
225+
__all__ = ["langextract_extract", "langextract_render_file"]

src/praisonai/praisonai/cli/app.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,26 @@
1414

1515

1616
def _setup_langfuse_observability(*, verbose: bool = False) -> None:
17-
"""Set up Langfuse observability by wiring TraceSink to action emitter."""
17+
"""Set up Langfuse observability by wiring both Action and Context emitters."""
1818
try:
1919
from praisonai.observability.langfuse import LangfuseSink
2020
from praisonaiagents.trace.protocol import TraceEmitter, set_default_emitter
21+
from praisonaiagents.trace.context_events import ContextTraceEmitter, set_context_emitter
22+
import atexit
2123

2224
# Create LangfuseSink (auto-reads env vars)
2325
sink = LangfuseSink()
2426

25-
# Set up action-level trace emitter (sufficient for Phase 1)
26-
emitter = TraceEmitter(sink=sink, enabled=True)
27-
set_default_emitter(emitter)
27+
# Set up action-level trace emitter (for backward compatibility)
28+
action_emitter = TraceEmitter(sink=sink, enabled=True)
29+
set_default_emitter(action_emitter)
30+
31+
# Set up context-level trace emitter (captures rich agent lifecycle events)
32+
context_emitter = ContextTraceEmitter(sink=sink.context_sink(), enabled=True)
33+
set_context_emitter(context_emitter)
34+
35+
# Clean up on exit
36+
atexit.register(sink.close)
2837

2938
except ImportError:
3039
# Gracefully degrade if Langfuse not installed

0 commit comments

Comments
 (0)