Skip to content

Commit 44e4d82

Browse files
feat: implement langextract tools for interactive text visualization (fixes #24)
- Add langextract_extract and langextract_render_file tools - Support for highlighting text extractions in interactive HTML - Lazy imports with graceful degradation when langextract not installed - Security warnings for file operations - Comprehensive test suite with mocking - Usage example with agent integration patterns - Optional dependency langextract>=0.1.0 in pyproject.toml Co-authored-by: Mervin Praison <MervinPraison@users.noreply.github.com>
1 parent 3c698b4 commit 44e4d82

5 files changed

Lines changed: 856 additions & 0 deletions

File tree

examples/langextract_example.py

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
"""
2+
Langextract Tool Example
3+
4+
This example demonstrates how to use the langextract tools for
5+
interactive text visualization and analysis with PraisonAI agents.
6+
7+
Prerequisites:
8+
pip install praisonai-tools[langextract]
9+
# or
10+
pip install praisonai-tools langextract
11+
12+
Features demonstrated:
13+
- Text analysis with highlighted extractions
14+
- File-based document analysis
15+
- Integration with PraisonAI agents
16+
- Interactive HTML generation
17+
"""
18+
19+
import os
20+
import tempfile
21+
from pathlib import Path
22+
23+
from praisonai_tools import langextract_extract, langextract_render_file
24+
25+
26+
def basic_text_analysis():
27+
"""Demonstrate basic text analysis with highlighted extractions."""
28+
print("🔍 Basic Text Analysis with Langextract")
29+
print("=" * 50)
30+
31+
# Sample contract text
32+
contract_text = """
33+
CONSULTING AGREEMENT
34+
35+
This Agreement is entered into on March 15, 2024, between TechCorp Inc.
36+
(the "Client") and Jane Smith (the "Consultant"). The Consultant will
37+
provide AI development services for a period of 6 months starting
38+
April 1, 2024.
39+
40+
Payment terms: $5,000 per month, payable within 15 days of invoice date.
41+
Confidentiality obligations remain in effect for 2 years after termination.
42+
"""
43+
44+
# Key terms to highlight
45+
key_terms = [
46+
"TechCorp Inc.",
47+
"Jane Smith",
48+
"March 15, 2024",
49+
"April 1, 2024",
50+
"$5,000 per month",
51+
"15 days",
52+
"2 years",
53+
"AI development services"
54+
]
55+
56+
# Extract and visualize
57+
result = langextract_extract(
58+
text=contract_text,
59+
extractions=key_terms,
60+
document_id="consulting-agreement",
61+
output_path="contract_analysis.html",
62+
auto_open=False # Set to True to open in browser automatically
63+
)
64+
65+
if result.get("success"):
66+
print(f"✅ Analysis complete!")
67+
print(f" Document ID: {result['document_id']}")
68+
print(f" Output file: {result['output_path']}")
69+
print(f" Extractions: {result['extractions_count']} terms highlighted")
70+
print(f" Text length: {result['text_length']} characters")
71+
print()
72+
print(f"💡 Open {result['output_path']} in your browser to view the interactive visualization!")
73+
else:
74+
print(f"❌ Error: {result.get('error')}")
75+
if "langextract not installed" in result.get("error", ""):
76+
print("💡 Install with: pip install langextract")
77+
78+
79+
def file_analysis_example():
80+
"""Demonstrate file-based document analysis."""
81+
print("📄 File-based Document Analysis")
82+
print("=" * 50)
83+
84+
# Create a sample document
85+
document_content = """
86+
TECHNICAL SPECIFICATION DOCUMENT
87+
88+
Project: AI-Powered Analytics Dashboard
89+
Version: 2.1.0
90+
Date: April 17, 2026
91+
92+
REQUIREMENTS:
93+
- Python 3.10+ runtime environment
94+
- PostgreSQL 14+ database
95+
- Redis for caching layer
96+
- Docker for containerization
97+
- Kubernetes for orchestration
98+
99+
SECURITY CONSIDERATIONS:
100+
- JWT authentication with 24-hour expiry
101+
- Rate limiting: 1000 requests per hour per API key
102+
- HTTPS-only communication
103+
- Data encryption at rest using AES-256
104+
105+
PERFORMANCE TARGETS:
106+
- API response time: < 200ms for 95th percentile
107+
- Database query optimization required
108+
- Concurrent user support: 10,000+ users
109+
"""
110+
111+
# Create temporary file
112+
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
113+
f.write(document_content)
114+
temp_file_path = f.name
115+
116+
try:
117+
# Technical terms to highlight
118+
tech_terms = [
119+
"Python 3.10+",
120+
"PostgreSQL 14+",
121+
"Redis",
122+
"Docker",
123+
"Kubernetes",
124+
"JWT authentication",
125+
"24-hour expiry",
126+
"1000 requests per hour",
127+
"AES-256",
128+
"< 200ms",
129+
"10,000+ users"
130+
]
131+
132+
# Analyze the file
133+
result = langextract_render_file(
134+
file_path=temp_file_path,
135+
extractions=tech_terms,
136+
output_path="tech_spec_analysis.html",
137+
auto_open=False
138+
)
139+
140+
if result.get("success"):
141+
print(f"✅ File analysis complete!")
142+
print(f" Source file: {temp_file_path}")
143+
print(f" Document ID: {result['document_id']}")
144+
print(f" Output file: {result['output_path']}")
145+
print(f" Extractions: {result['extractions_count']} terms highlighted")
146+
print(f" Text length: {result['text_length']} characters")
147+
print()
148+
print(f"💡 Open {result['output_path']} in your browser to view the interactive visualization!")
149+
else:
150+
print(f"❌ Error: {result.get('error')}")
151+
152+
finally:
153+
# Clean up temporary file
154+
try:
155+
os.unlink(temp_file_path)
156+
except OSError:
157+
pass
158+
159+
160+
def agent_integration_example():
161+
"""Demonstrate integration with PraisonAI agents."""
162+
print("🤖 PraisonAI Agent Integration")
163+
print("=" * 50)
164+
165+
# This is how you would integrate with agents
166+
agent_code = '''
167+
from praisonaiagents import Agent
168+
from praisonai_tools import langextract_extract, langextract_render_file
169+
170+
# Create an agent that analyzes documents
171+
document_analyzer = Agent(
172+
name="DocumentAnalyzer",
173+
instructions="""
174+
You are a document analysis expert. When given text or documents,
175+
identify key entities, dates, financial figures, and important terms.
176+
Use langextract tools to create interactive visualizations highlighting
177+
your findings.
178+
""",
179+
tools=[langextract_extract, langextract_render_file]
180+
)
181+
182+
# Agent workflow example
183+
def analyze_document(text_or_file_path):
184+
"""Agent analyzes document and creates visualization."""
185+
186+
if text_or_file_path.endswith('.txt'):
187+
# File-based analysis
188+
response = document_analyzer.start(
189+
f"Analyze this document file and highlight key terms: {text_or_file_path}"
190+
)
191+
else:
192+
# Text-based analysis
193+
response = document_analyzer.start(
194+
f"Analyze this text and highlight important information: {text_or_file_path}"
195+
)
196+
197+
return response
198+
199+
# Example usage:
200+
# result = analyze_document("contract.txt")
201+
# result = analyze_document("The quarterly report shows revenue of $1.2M...")
202+
'''
203+
204+
print("💻 Example agent integration code:")
205+
print(agent_code)
206+
print()
207+
print("🔧 Key integration points:")
208+
print(" - Import tools: langextract_extract, langextract_render_file")
209+
print(" - Add to agent tools list")
210+
print(" - Agent can automatically use tools based on instructions")
211+
print(" - Interactive HTML files created for human review")
212+
213+
214+
def error_handling_example():
215+
"""Demonstrate graceful error handling."""
216+
print("⚠️ Error Handling and Graceful Degradation")
217+
print("=" * 50)
218+
219+
# Test without langextract installed (simulated)
220+
print("Testing graceful degradation when langextract is not installed:")
221+
222+
result = langextract_extract(
223+
text="Sample text for analysis",
224+
extractions=["Sample"],
225+
document_id="test"
226+
)
227+
228+
if "error" in result:
229+
print(f"✅ Graceful error handling: {result['error']}")
230+
print("💡 Users get clear installation instructions")
231+
else:
232+
print("✅ Langextract is available and working!")
233+
234+
print()
235+
print("Common error scenarios handled:")
236+
print(" - ❌ langextract not installed → Clear installation message")
237+
print(" - ❌ Invalid file path → File not found error")
238+
print(" - ❌ Empty text input → Parameter validation")
239+
print(" - ❌ Browser auto-open fails → Graceful fallback")
240+
241+
242+
def main():
243+
"""Run all examples."""
244+
print("🚀 Langextract Tool Examples for PraisonAI")
245+
print("=" * 70)
246+
print()
247+
248+
try:
249+
basic_text_analysis()
250+
print()
251+
252+
file_analysis_example()
253+
print()
254+
255+
agent_integration_example()
256+
print()
257+
258+
error_handling_example()
259+
print()
260+
261+
print("🎉 All examples completed!")
262+
print()
263+
print("📚 Next Steps:")
264+
print(" 1. Install langextract: pip install langextract")
265+
print(" 2. Run your own text analysis")
266+
print(" 3. Integrate with your PraisonAI agents")
267+
print(" 4. Open generated HTML files to see interactive visualizations")
268+
269+
except Exception as e:
270+
print(f"❌ Example error: {e}")
271+
print("💡 This is expected if langextract is not installed")
272+
273+
274+
if __name__ == "__main__":
275+
main()

praisonai_tools/tools/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,10 @@ def __getattr__(name):
394394
# LumaLabs
395395
"LumaLabsTool": "lumalabs_tool",
396396
"lumalabs_generate": "lumalabs_tool",
397+
# Langextract
398+
"LangExtractTool": "langextract_tool",
399+
"langextract_extract": "langextract_tool",
400+
"langextract_render_file": "langextract_tool",
397401
# HeyGen
398402
"HeyGenTool": "heygen_tool",
399403
"heygen_list_avatars": "heygen_tool",
@@ -874,4 +878,8 @@ def __getattr__(name):
874878
"N8nWorkflowTool",
875879
"n8n_workflow",
876880
"n8n_list_workflows",
881+
# Langextract Tool
882+
"LangExtractTool",
883+
"langextract_extract",
884+
"langextract_render_file",
877885
]

0 commit comments

Comments
 (0)