Merge pull request Shubhamsaboo#131 from Madhuvod/legal-changess

Shubhamsaboo · web-flow · commit 81b75cf72c1d · 2025-02-25T15:33:16.000-06:00
Fixed issues in AI Legal Agent
diff --git a/ai_agent_tutorials/ai_legal_agent_team/legal_agent_team.py b/ai_agent_tutorials/ai_legal_agent_team/legal_agent_team.py
@@ -7,8 +7,8 @@
 from agno.embedder.openai import OpenAIEmbedder
 import tempfile
 import os
+from agno.document.chunking.document import DocumentChunking
 
-#initializing the session state variables
 def init_session_state():
     """Initialize session state variables"""
     if 'openai_api_key' not in st.session_state:
@@ -23,55 +23,87 @@ def init_session_state():
         st.session_state.legal_team = None
     if 'knowledge_base' not in st.session_state:
         st.session_state.knowledge_base = None
+    # Add a new state variable to track processed files
+    if 'processed_files' not in st.session_state:
+        st.session_state.processed_files = set()
+
+COLLECTION_NAME = "legal_documents"  # Define your collection name
 
 def init_qdrant():
-    """Initialize Qdrant vector database"""
-    if not st.session_state.qdrant_api_key:
-        raise ValueError("Qdrant API key not provided")
-    if not st.session_state.qdrant_url:
-        raise ValueError("Qdrant URL not provided")
-        
-    return Qdrant(          
-        collection="legal_knowledge",
-        url=st.session_state.qdrant_url,
-        api_key=st.session_state.qdrant_api_key,
-        https=True,
-        timeout=None,
-        distance="cosine"
-    )
+    """Initialize Qdrant client with configured settings."""
+    if not all([st.session_state.qdrant_api_key, st.session_state.qdrant_url]):
+        return None
+    try:
+        # Create Agno's Qdrant instance which implements VectorDb
+        vector_db = Qdrant(
+            collection=COLLECTION_NAME,
+            url=st.session_state.qdrant_url,
+            api_key=st.session_state.qdrant_api_key,
+            embedder=OpenAIEmbedder(
+                id="text-embedding-3-small", 
+                api_key=st.session_state.openai_api_key
+            )
+        )
+        return vector_db
+    except Exception as e:
+        st.error(f"🔴 Qdrant connection failed: {str(e)}")
+        return None
 
 def process_document(uploaded_file, vector_db: Qdrant):
-    """Process document, create embeddings and store in Qdrant vector database"""
+    """
+    Process document, create embeddings and store in Qdrant vector database
+    
+    Args:
+        uploaded_file: Streamlit uploaded file object
+        vector_db (Qdrant): Initialized Qdrant instance from Agno
+    
+    Returns:
+        PDFKnowledgeBase: Initialized knowledge base with processed documents
+    """
     if not st.session_state.openai_api_key:
         raise ValueError("OpenAI API key not provided")
         
     os.environ['OPENAI_API_KEY'] = st.session_state.openai_api_key
     
-    with tempfile.TemporaryDirectory() as temp_dir:
-      
-        temp_file_path = os.path.join(temp_dir, uploaded_file.name)
-        with open(temp_file_path, "wb") as f:
-            f.write(uploaded_file.getbuffer())
-
-        try:
-       
-            embedder = OpenAIEmbedder(
-                model="text-embedding-3-small",
-                api_key=st.session_state.openai_api_key
+    try:
+        # Save the uploaded file to a temporary location
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
+            temp_file.write(uploaded_file.getvalue())
+            temp_file_path = temp_file.name
+        
+        st.info("Loading and processing document...")
+        
+        # Create a PDFKnowledgeBase with the vector_db
+        knowledge_base = PDFKnowledgeBase(
+            path=temp_file_path,  # Single string path, not a list
+            vector_db=vector_db,
+            reader=PDFReader(),
+            chunking_strategy=DocumentChunking(
+                chunk_size=1000,
+                overlap=200
             )
+        )
+        
+        # Load the documents into the knowledge base
+        with st.spinner('📤 Loading documents into knowledge base...'):
+            try:
+                knowledge_base.load(recreate=True, upsert=True)
+                st.success("✅ Documents stored successfully!")
+            except Exception as e:
+                st.error(f"Error loading documents: {str(e)}")
+                raise
+        
+        # Clean up the temporary file
+        try:
+            os.unlink(temp_file_path)
+        except Exception:
+            pass
             
-            # Creating knowledge base with explicit Qdrant configuration
-            knowledge_base = PDFKnowledgeBase(
-                path=temp_dir, 
-                vector_db=vector_db, 
-                reader=PDFReader(chunk=True),
-                embedder=embedder,
-                recreate_vector_db=True  
-            )
-            knowledge_base.load()     
-            return knowledge_base      
-        except Exception as e:
-            raise Exception(f"Error processing document: {str(e)}")
+        return knowledge_base
+            
+    except Exception as e:
+        st.error(f"Document processing error: {str(e)}")
+        raise Exception(f"Error processing document: {str(e)}")
 
 def main():
     st.set_page_config(page_title="Legal Document Analyzer", layout="wide")
@@ -102,7 +134,7 @@ def main():
 
         qdrant_url = st.text_input(
             "Qdrant URL",
-            value=st.session_state.qdrant_url if st.session_state.qdrant_url else "https://f499085c-b4bf-4bda-a9a5-227f62a9ca20.us-west-2-0.aws.cloud.qdrant.io:6333",
+            value=st.session_state.qdrant_url if st.session_state.qdrant_url else "",
             help="Enter your Qdrant instance URL"
         )
         if qdrant_url:
@@ -111,8 +143,10 @@ def main():
         if all([st.session_state.qdrant_api_key, st.session_state.qdrant_url]):
             try:
                 if not st.session_state.vector_db:
+                    # Make sure we're initializing a QdrantClient here
                     st.session_state.vector_db = init_qdrant()
-                    st.success("Successfully connected to Qdrant!")
+                    if st.session_state.vector_db:
+                        st.success("Successfully connected to Qdrant!")
             except Exception as e:
                 st.error(f"Failed to connect to Qdrant: {str(e)}")
 
@@ -123,80 +157,90 @@ def main():
             uploaded_file = st.file_uploader("Upload Legal Document", type=['pdf'])
             
             if uploaded_file:
-                with st.spinner("Processing document..."):
-                    try:
-                        knowledge_base = process_document(uploaded_file, st.session_state.vector_db)
-                        st.session_state.knowledge_base = knowledge_base
-                        
-                        # Initialize agents
-                        legal_researcher = Agent(
-                            name="Legal Researcher",
-                            role="Legal research specialist",
-                            model=OpenAIChat(model="gpt-4o"),
-                            tools=[DuckDuckGoTools()],
-                            knowledge=st.session_state.knowledge_base,
-                            search_knowledge=True,
-                            instructions=[
-                                "Find and cite relevant legal cases and precedents",
-                                "Provide detailed research summaries with sources",
-                                "Reference specific sections from the uploaded document",
-                                "Always search the knowledge base for relevant information"
-                            ],
-                            show_tool_calls=True,
-                            markdown=True
-                        )
+                # Check if this file has already been processed
+                if uploaded_file.name not in st.session_state.processed_files:
+                    with st.spinner("Processing document..."):
+                        try:
+                            # Process the document and get the knowledge base
+                            knowledge_base = process_document(uploaded_file, st.session_state.vector_db)
+                            
+                            if knowledge_base:
+                                st.session_state.knowledge_base = knowledge_base
+                                # Add the file to processed files
+                                st.session_state.processed_files.add(uploaded_file.name)
+                                
+                                # Initialize agents
+                                legal_researcher = Agent(
+                                    name="Legal Researcher",
+                                    role="Legal research specialist",
+                                    model=OpenAIChat(id="gpt-4o"),
+                                    tools=[DuckDuckGoTools()],
+                                    knowledge=st.session_state.knowledge_base,
+                                    search_knowledge=True,
+                                    instructions=[
+                                        "Find and cite relevant legal cases and precedents",
+                                        "Provide detailed research summaries with sources",
+                                        "Reference specific sections from the uploaded document",
+                                        "Always search the knowledge base for relevant information"
+                                    ],
+                                    show_tool_calls=True,
+                                    markdown=True
+                                )
 
-                        contract_analyst = Agent(
-                            name="Contract Analyst",
-                            role="Contract analysis specialist",
-                            model=OpenAIChat(model="gpt-4o"),
-                            knowledge=knowledge_base,
-                            search_knowledge=True,
-                            instructions=[
-                                "Review contracts thoroughly",
-                                "Identify key terms and potential issues",
-                                "Reference specific clauses from the document"
-                            ],
-                            markdown=True
-                        )
+                                contract_analyst = Agent(
+                                    name="Contract Analyst",
+                                    role="Contract analysis specialist",
+                                    model=OpenAIChat(id="gpt-4o"),
+                                    knowledge=st.session_state.knowledge_base,
+                                    search_knowledge=True,
+                                    instructions=[
+                                        "Review contracts thoroughly",
+                                        "Identify key terms and potential issues",
+                                        "Reference specific clauses from the document"
+                                    ],
+                                    markdown=True
+                                )
 
-                        legal_strategist = Agent(
-                            name="Legal Strategist", 
-                            role="Legal strategy specialist",
-                            model=OpenAIChat(model="gpt-4o"),
-                            knowledge=knowledge_base,
-                            search_knowledge=True,
-                            instructions=[
-                                "Develop comprehensive legal strategies",
-                                "Provide actionable recommendations",
-                                "Consider both risks and opportunities"
-                            ],
-                            markdown=True
-                        )
+                                legal_strategist = Agent(
+                                    name="Legal Strategist", 
+                                    role="Legal strategy specialist",
+                                    model=OpenAIChat(id="gpt-4o"),
+                                    knowledge=st.session_state.knowledge_base,
+                                    search_knowledge=True,
+                                    instructions=[
+                                        "Develop comprehensive legal strategies",
+                                        "Provide actionable recommendations",
+                                        "Consider both risks and opportunities"
+                                    ],
+                                    markdown=True
+                                )
 
-                        # Legal Agent Team
-                        st.session_state.legal_team = Agent(
-                            name="Legal Team Lead",
-                            role="Legal team coordinator",
-                            model=OpenAIChat(model="gpt-4o"),
-                            team=[legal_researcher, contract_analyst, legal_strategist],
-                            knowledge=st.session_state.knowledge_base,
-                            search_knowledge=True,
-                            instructions=[
-                                "Coordinate analysis between team members",
-                                "Provide comprehensive responses",
-                                "Ensure all recommendations are properly sourced",
-                                "Reference specific parts of the uploaded document",
-                                "Always search the knowledge base before delegating tasks"
-                            ],
-                            show_tool_calls=True,
-                            markdown=True
-                        )
-                        
-                        st.success("✅ Document processed and team initialized!")
-                            
-                    except Exception as e:
-                        st.error(f"Error processing document: {str(e)}")
+                                # Legal Agent Team
+                                st.session_state.legal_team = Agent(
+                                    name="Legal Team Lead",
+                                    role="Legal team coordinator",
+                                    model=OpenAIChat(id="gpt-4o"),
+                                    team=[legal_researcher, contract_analyst, legal_strategist],
+                                    knowledge=st.session_state.knowledge_base,
+                                    search_knowledge=True,
+                                    instructions=[
+                                        "Coordinate analysis between team members",
+                                        "Provide comprehensive responses",
+                                        "Ensure all recommendations are properly sourced",
+                                        "Reference specific parts of the uploaded document",
+                                        "Always search the knowledge base before delegating tasks"
+                                    ],
+                                    show_tool_calls=True,
+                                    markdown=True
+                                )
+                                
+                                st.success("✅ Document processed and team initialized!")
+                                
+                        except Exception as e:
+                            st.error(f"Error processing document: {str(e)}")
+                else:
+                    # File already processed, just show a message
+                    st.success("✅ Document already processed and team ready!")
 
             st.divider()
             st.header("🔍 Analysis Options")