feat(rag): implement Chain of Thought prompting in RAG agents - Add use_cot parameter - Enhance prompt templates - Support CoT in both agents

jasperan · jasperan · commit f082c0440fff · 2025-02-17T18:56:02.000+01:00
diff --git a/agentic_rag/local_rag_agent.py b/agentic_rag/local_rag_agent.py
@@ -29,9 +29,10 @@ class QueryAnalysis(BaseModel):
     )
 
 class LocalRAGAgent:
-    def __init__(self, vector_store: VectorStore, model_name: str = "mistralai/Mistral-7B-Instruct-v0.2"):
+    def __init__(self, vector_store: VectorStore, model_name: str = "mistralai/Mistral-7B-Instruct-v0.2", use_cot: bool = False):
         """Initialize local RAG agent with vector store and local LLM"""
         self.vector_store = vector_store
+        self.use_cot = use_cot
         
         # Load HuggingFace token from config
         try:
@@ -120,7 +121,15 @@ def _generate_direct_response(self, query: str) -> Dict[str, Any]:
         """Generate a response directly from the LLM without context"""
         logger.info("Generating direct response from LLM without context...")
         
-        prompt = f"""You are a helpful AI assistant. Please answer the following query to the best of your ability.
+        if self.use_cot:
+            prompt = f"""You are a helpful AI assistant. Please answer the following query using chain of thought reasoning.
+First break down the problem into steps, then solve each step to arrive at the final answer.
+
+Query: {query}
+
+Let's think about this step by step:"""
+        else:
+            prompt = f"""You are a helpful AI assistant. Please answer the following query to the best of your ability.
 If you're not confident about the answer, please say so.
 
 Query: {query}
@@ -199,7 +208,19 @@ def _generate_response(self, query: str, context: List[Dict[str, Any]]) -> Dict[
                                   for i, item in enumerate(context)])
         
         logger.info("Building prompt with context...")
-        prompt = f"""Answer the following query using the provided context. 
+        if self.use_cot:
+            prompt = f"""Answer the following query using the provided context and chain of thought reasoning.
+First break down the problem into steps, then use the context to solve each step and arrive at the final answer.
+If the context doesn't contain enough information to answer accurately, say so explicitly.
+
+Context:
+{context_str}
+
+Query: {query}
+
+Let's think about this step by step:"""
+        else:
+            prompt = f"""Answer the following query using the provided context. 
 If the context doesn't contain enough information to answer accurately, 
 say so explicitly.
 
@@ -225,6 +246,7 @@ def main():
     parser.add_argument("--store-path", default="embeddings", help="Path to the vector store")
     parser.add_argument("--model", default="mistralai/Mistral-7B-Instruct-v0.2", help="Model to use")
     parser.add_argument("--quiet", action="store_true", help="Disable verbose logging")
+    parser.add_argument("--use-cot", action="store_true", help="Enable Chain of Thought reasoning")
     
     args = parser.parse_args()
     
@@ -241,7 +263,7 @@ def main():
         logger.info(f"Initializing vector store from: {args.store_path}")
         store = VectorStore(persist_directory=args.store_path)
         logger.info("Initializing local RAG agent...")
-        agent = LocalRAGAgent(store, model_name=args.model)
+        agent = LocalRAGAgent(store, model_name=args.model, use_cot=args.use_cot)
         
         print(f"\nProcessing query: {args.query}")
         print("=" * 50)
diff --git a/agentic_rag/rag_agent.py b/agentic_rag/rag_agent.py
@@ -21,9 +21,10 @@ class QueryAnalysis(BaseModel):
     )
 
 class RAGAgent:
-    def __init__(self, vector_store: VectorStore, openai_api_key: str):
+    def __init__(self, vector_store: VectorStore, openai_api_key: str, use_cot: bool = False):
         """Initialize RAG agent with vector store and LLM"""
         self.vector_store = vector_store
+        self.use_cot = use_cot
         self.llm = ChatOpenAI(
             model="gpt-4-turbo-preview",
             temperature=0,
@@ -94,18 +95,30 @@ def _generate_response(self, query: str, context: List[Dict[str, Any]]) -> Dict[
         context_str = "\n\n".join([f"Context {i+1}:\n{item['content']}" 
                                   for i, item in enumerate(context)])
         
-        prompt = ChatPromptTemplate.from_template(
-            """Answer the following query using the provided context. 
-            If the context doesn't contain enough information to answer accurately, 
-            say so explicitly.
-            
-            Context:
-            {context}
-            
-            Query: {query}
-            
-            Answer:"""
-        )
+        if self.use_cot:
+            template = """Answer the following query using the provided context and chain of thought reasoning.
+First break down the problem into steps, then use the context to solve each step and arrive at the final answer.
+If the context doesn't contain enough information to answer accurately, say so explicitly.
+
+Context:
+{context}
+
+Query: {query}
+
+Let's think about this step by step:"""
+        else:
+            template = """Answer the following query using the provided context. 
+If the context doesn't contain enough information to answer accurately, 
+say so explicitly.
+
+Context:
+{context}
+
+Query: {query}
+
+Answer:"""
+        
+        prompt = ChatPromptTemplate.from_template(template)
         
         messages = prompt.format_messages(context=context_str, query=query)
         response = self.llm.invoke(messages)
@@ -119,6 +132,7 @@ def main():
     parser = argparse.ArgumentParser(description="Query documents using OpenAI GPT-4")
     parser.add_argument("--query", required=True, help="Query to process")
     parser.add_argument("--store-path", default="chroma_db", help="Path to the vector store")
+    parser.add_argument("--use-cot", action="store_true", help="Enable Chain of Thought reasoning")
     
     args = parser.parse_args()
     
@@ -135,7 +149,7 @@ def main():
     
     try:
         store = VectorStore(persist_directory=args.store_path)
-        agent = RAGAgent(store, openai_api_key=os.getenv("OPENAI_API_KEY"))
+        agent = RAGAgent(store, openai_api_key=os.getenv("OPENAI_API_KEY"), use_cot=args.use_cot)
         
         print(f"\nProcessing query: {args.query}")
         print("=" * 50)