feat: Improve RAG agent flow - Use LLM directly for general knowledge queries - Update store path to embeddings/ - Add detailed logging - Update documentation

jasperan · jasperan · commit ab750869355e · 2025-01-30T09:09:15.000+01:00
diff --git a/agentic_rag/README.md b/agentic_rag/README.md
@@ -73,47 +73,53 @@ python pdf_processor.py --input path/to/pdf/directory --output chunks.json
 
 # Process a single PDF from a URL 
 python pdf_processor.py --input https://example.com/document.pdf --output chunks.json
+# sample pdf: https://arxiv.org/pdf/2203.06605
 ```
 
 #### Manage Vector Store
 
 Add documents to the vector store and query them:
+
 ```bash
 # Add documents from a chunks file
-python store.py --add chunks.json --store-path my_chroma_db
+python store.py --add chunks.json
 
-# Query the vector store
-python store.py --query "your search query" --store-path my_chroma_db
+# Query the vector store directly, or with local_rag_agent.py
+python store.py --query "your search query"
+python local_rag_agent.py --query "your search query"
 ```
 
 #### Use RAG Agent
-Query documents using either the OpenAI or local model:
+
+Query documents using either OpenAI or a local model:
+
 ```bash
 # Using OpenAI (requires API key in .env)
-python rag_agent.py --query "What are the main topics?" --store-path my_chroma_db
+python rag_agent.py --query "Can you explain the DaGAN Approach proposed in the Depth-Aware Generative Adversarial Network for Talking Head Video Generation article?"
 
 # Using local Mistral model
-python local_rag_agent.py --query "What are the main topics?" --store-path my_chroma_db
+python local_rag_agent.py --query "Can you explain the DaGAN Approach proposed in the Depth-Aware Generative Adversarial Network for Talking Head Video Generation article?"
 ```
 
 ### 3. Complete Pipeline Example
 
 Here's how to process a document and query it using the local model:
+
 ```bash
 # 1. Process the PDF
 python pdf_processor.py --input example.pdf --output chunks.json
 
 # 2. Add to vector store
-python store.py --add chunks.json --store-path my_chroma_db
+python store.py --add chunks.json
 
 # 3. Query using local model
-python local_rag_agent.py --query "What is the main conclusion?" --store-path my_chroma_db
+python local_rag_agent.py --query "Can you explain the DaGAN Approach proposed in the Depth-Aware Generative Adversarial Network for Talking Head Video Generation article?"
 ```
 
 Or using OpenAI (requires API key):
 ```bash
 # Same steps 1 and 2 as above, then:
-python rag_agent.py --query "What is the main conclusion?" --store-path my_chroma_db
+python rag_agent.py --query "Can you explain the DaGAN Approach proposed in the Depth-Aware Generative Adversarial Network for Talking Head Video Generation article?"
 ```
 
 ## API Endpoints
@@ -153,11 +159,19 @@ The system consists of several key components:
    - Local Agent: Uses `Mistral-7B` as an open-source alternative
 4. **FastAPI Server**: Provides REST API endpoints for document upload and querying
 
+The RAG Agent flow is the following:
+
+1. Analyzes query type
+2. Try to find relevant PDF context, regardless of query type
+3. If PDF context is found, use it to generate a response.
+4. If no PDF context is found OR if it's a general knowledge query, use the pre-trainedLLM directly
+5. Fall back to a "no information" response only in edge cases.
+
 ## Hardware Requirements
 
-- For OpenAI Agent: Standard CPU machine
-- For Local Agent: 
-  - Minimum 16GB RAM, recommended more than 24GBs
+- For the OpenAI Agent: Standard CPU machine
+- For the Local Agent: 
+  - Minimum 16GB RAM (recommended >24GBs)
   - GPU with 8GB VRAM recommended for better performance
   - Will run on CPU if GPU is not available, but will be significantly slower.
 
diff --git a/agentic_rag/local_rag_agent.py b/agentic_rag/local_rag_agent.py
@@ -6,6 +6,15 @@
 import argparse
 import yaml
 import os
+import logging
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    datefmt='%H:%M:%S'
+)
+logger = logging.getLogger(__name__)
 
 class QueryAnalysis(BaseModel):
     """Pydantic model for query analysis output"""
@@ -107,42 +116,89 @@ def _analyze_query(self, query: str) -> QueryAnalysis:
                 requires_context=True
             )
     
+    def _generate_direct_response(self, query: str) -> Dict[str, Any]:
+        """Generate a response directly from the LLM without context"""
+        logger.info("Generating direct response from LLM without context...")
+        
+        prompt = f"""You are a helpful AI assistant. Please answer the following query to the best of your ability.
+If you're not confident about the answer, please say so.
+
+Query: {query}
+
+Answer:"""
+        
+        logger.info("Generating response using local model...")
+        response = self._generate_text(prompt, max_length=1024)
+        logger.info("Response generation complete")
+        
+        return {
+            "answer": response,
+            "context": []
+        }
+
     def process_query(self, query: str) -> Dict[str, Any]:
         """Process a user query using the agentic RAG pipeline"""
+        logger.info(f"Starting to process query: {query}")
+        
         # Analyze the query
+        logger.info("Analyzing query type and context requirements...")
         analysis = self._analyze_query(query)
+        logger.info(f"Query analysis results:")
+        logger.info(f"- Type: {analysis.query_type}")
+        logger.info(f"- Requires context: {analysis.requires_context}")
+        logger.info(f"- Reasoning: {analysis.reasoning}")
         
         # If query type is unsupported, return early
         if analysis.query_type == "unsupported":
+            logger.warning("Query type is unsupported")
             return {
                 "answer": "I apologize, but I don't have the information to answer this query.",
                 "reasoning": analysis.reasoning,
                 "context": []
             }
         
-        # Retrieve relevant context based on query type
-        if analysis.query_type == "pdf_documents":
-            context = self.vector_store.query_pdf_collection(query)
-        else:
-            context = self.vector_store.query_general_collection(query)
+        # First try to get context from PDF documents
+        logger.info("Querying PDF collection...")
+        context = self.vector_store.query_pdf_collection(query)
+        logger.info(f"Retrieved {len(context)} context chunks")
         
-        # Generate response using context
-        if context and analysis.requires_context:
+        if context:
+            # If we found relevant PDF context, use it
+            for i, ctx in enumerate(context):
+                source = ctx["metadata"].get("source", "Unknown")
+                pages = ctx["metadata"].get("page_numbers", [])
+                logger.info(f"Context chunk {i+1}:")
+                logger.info(f"- Source: {source}")
+                logger.info(f"- Pages: {pages}")
+                logger.info(f"- Content preview: {ctx['content'][:100]}...")
+            
+            logger.info("Generating response with PDF context...")
             response = self._generate_response(query, context)
-        else:
-            response = {
-                "answer": "I couldn't find relevant information to answer your query.",
-                "reasoning": analysis.reasoning,
-                "context": []
-            }
+            logger.info("Response generated successfully")
+            return response
+        
+        # If no PDF context found or if it's a general knowledge query,
+        # use the LLM directly
+        if analysis.query_type == "general_knowledge" or not context:
+            logger.info("No relevant PDF context found or general knowledge query detected")
+            logger.info("Falling back to direct LLM response...")
+            return self._generate_direct_response(query)
         
-        return response
+        # This case should rarely happen, but just in case
+        logger.warning("No relevant context found and query type is not general knowledge")
+        return {
+            "answer": "I couldn't find relevant information to answer your query.",
+            "reasoning": analysis.reasoning,
+            "context": []
+        }
     
     def _generate_response(self, query: str, context: List[Dict[str, Any]]) -> Dict[str, Any]:
         """Generate a response using the retrieved context"""
+        logger.info("Preparing context for response generation...")
         context_str = "\n\n".join([f"Context {i+1}:\n{item['content']}" 
                                   for i, item in enumerate(context)])
         
+        logger.info("Building prompt with context...")
         prompt = f"""Answer the following query using the provided context. 
 If the context doesn't contain enough information to answer accurately, 
 say so explicitly.
@@ -154,7 +210,9 @@ def _generate_response(self, query: str, context: List[Dict[str, Any]]) -> Dict[
 
 Answer:"""
         
+        logger.info("Generating response using local model...")
         response = self._generate_text(prompt, max_length=1024)
+        logger.info("Response generation complete")
         
         return {
             "answer": response,
@@ -164,16 +222,25 @@ def _generate_response(self, query: str, context: List[Dict[str, Any]]) -> Dict[
 def main():
     parser = argparse.ArgumentParser(description="Query documents using local Mistral model")
     parser.add_argument("--query", required=True, help="Query to process")
-    parser.add_argument("--store-path", default="chroma_db", help="Path to the vector store")
+    parser.add_argument("--store-path", default="embeddings", help="Path to the vector store")
     parser.add_argument("--model", default="mistralai/Mistral-7B-Instruct-v0.2", help="Model to use")
+    parser.add_argument("--quiet", action="store_true", help="Disable verbose logging")
     
     args = parser.parse_args()
     
+    # Set logging level based on quiet flag
+    if args.quiet:
+        logger.setLevel(logging.WARNING)
+    else:
+        logger.setLevel(logging.INFO)
+    
     print("\nInitializing RAG agent...")
     print("=" * 50)
     
     try:
+        logger.info(f"Initializing vector store from: {args.store_path}")
         store = VectorStore(persist_directory=args.store_path)
+        logger.info("Initializing local RAG agent...")
         agent = LocalRAGAgent(store, model_name=args.model)
         
         print(f"\nProcessing query: {args.query}")
@@ -193,6 +260,7 @@ def main():
                 print(f"- {source} (pages: {pages})")
     
     except Exception as e:
+        logger.error(f"Error during execution: {str(e)}", exc_info=True)
         print(f"\n✗ Error: {str(e)}")
         exit(1)
 
diff --git a/agentic_rag/main.py b/agentic_rag/main.py
@@ -34,7 +34,7 @@
 pdf_processor = PDFProcessor()
 vector_store = VectorStore()
 
-# Initialize RAG agent - use OpenAI if API key is available, otherwise use local model
+# Initialize RAG agent - use OpenAI if API key is available, otherwise use local model. by default = local model
 openai_api_key = os.getenv("OPENAI_API_KEY")
 if openai_api_key:
     print("\nUsing OpenAI GPT-4 for RAG...")
diff --git a/agentic_rag/store.py b/agentic_rag/store.py
@@ -5,7 +5,7 @@
 from chromadb.config import Settings
 
 class VectorStore:
-    def __init__(self, persist_directory: str = "chroma_db"):
+    def __init__(self, persist_directory: str = "embeddings"):
         """Initialize vector store with ChromaDB"""
         self.client = chromadb.PersistentClient(
             path=persist_directory,
@@ -113,7 +113,7 @@ def main():
     parser = argparse.ArgumentParser(description="Manage vector store")
     parser.add_argument("--add", help="JSON file containing chunks to add")
     parser.add_argument("--query", help="Query to search for")
-    parser.add_argument("--store-path", default="chroma_db", help="Path to vector store")
+    parser.add_argument("--store-path", default="embeddings", help="Path to vector store")
     
     args = parser.parse_args()
     store = VectorStore(persist_directory=args.store_path)