@@ -245,126 +245,91 @@ def process_query(self, query: str) -> Dict[str, Any]:
245245 else :
246246 return self ._generate_general_response (query )
247247 else :
248- # For PDF or Repository collections, use context-based processing
248+ # For PDF, Repository, or Web collections, use context-based processing
249249 if self .use_cot :
250250 return self ._process_query_with_cot (query )
251251 else :
252252 return self ._process_query_standard (query )
253253
254254 def _process_query_with_cot (self , query : str ) -> Dict [str , Any ]:
255- """Process query using Chain of Thought reasoning with multiple agents"""
256- logger .info ("Processing query with Chain of Thought reasoning" )
257-
258- # Get initial context based on selected collection
259- initial_context = []
260- if self .collection == "PDF Collection" :
261- logger .info (f"Retrieving context from PDF Collection for query: '{ query } '" )
262- pdf_context = self .vector_store .query_pdf_collection (query )
263- initial_context .extend (pdf_context )
264- logger .info (f"Retrieved { len (pdf_context )} chunks from PDF Collection" )
265- # Don't log individual sources to keep console clean
266- elif self .collection == "Repository Collection" :
267- logger .info (f"Retrieving context from Repository Collection for query: '{ query } '" )
268- repo_context = self .vector_store .query_repo_collection (query )
269- initial_context .extend (repo_context )
270- logger .info (f"Retrieved { len (repo_context )} chunks from Repository Collection" )
271- # Don't log individual sources to keep console clean
272- # For General Knowledge, no context is needed
273- else :
274- logger .info ("Using General Knowledge collection, no context retrieval needed" )
275-
255+ """Process query using Chain of Thought reasoning"""
276256 try :
277- # Step 1: Planning
278- logger .info ("Step 1: Planning" )
279- if not self .agents or "planner" not in self .agents :
280- logger .warning ("No planner agent available, using direct response" )
281- return self ._generate_general_response (query )
257+ # Get context based on collection type
258+ if self .collection == "PDF Collection" :
259+ context = self .vector_store .query_pdf_collection (query )
260+ elif self .collection == "Repository Collection" :
261+ context = self .vector_store .query_repo_collection (query )
262+ elif self .collection == "Web Knowledge Base" :
263+ context = self .vector_store .query_web_collection (query )
264+ else :
265+ context = []
282266
283- plan = self . agents [ "planner" ]. plan ( query , initial_context )
284- logger .info (f"Generated plan: \n { plan } " )
267+ # Log number of chunks retrieved
268+ logger .info (f"Retrieved { len ( context ) } chunks from { self . collection } " )
285269
286- # Step 2: Research each step (if researcher is available)
287- logger .info ("Step 2: Research" )
288- research_results = []
289- if self .agents .get ("researcher" ) is not None and initial_context :
290- for step in plan .split ("\n " ):
291- if not step .strip ():
292- continue
293- step_research = self .agents ["researcher" ].research (query , step )
294- research_results .append ({"step" : step , "findings" : step_research })
295- # Don't log source indices to keep console clean
296- logger .info (f"Research for step: { step } " )
297- else :
298- # If no researcher or no context, use the steps directly
299- research_results = [{"step" : step , "findings" : []} for step in plan .split ("\n " ) if step .strip ()]
300- logger .info ("No research performed (no researcher agent or no context available)" )
270+ # Create agents if not already created
271+ if not self .agents :
272+ self .agents = create_agents (self .llm , self .vector_store )
301273
302- # Step 3: Reasoning about each step
303- logger .info ("Step 3: Reasoning" )
304- if not self .agents .get ("reasoner" ):
305- logger .warning ("No reasoner agent available, using direct response" )
306- return self ._generate_general_response (query )
274+ # Get planning step
275+ planning_result = self .agents ["planner" ].plan (query , context )
276+ logger .info ("Planning step completed" )
307277
308- reasoning_steps = []
309- for result in research_results :
310- step_reasoning = self .agents ["reasoner" ].reason (
311- query ,
312- result ["step" ],
313- result ["findings" ] if result ["findings" ] else [{"content" : "Using general knowledge" , "metadata" : {"source" : "General Knowledge" }}]
314- )
315- reasoning_steps .append (step_reasoning )
316- # Log just the step, not the full reasoning
317- logger .info (f"Reasoning for step: { result ['step' ]} " )
278+ # Get research step
279+ research_result = self .agents ["researcher" ].research (query , context )
280+ logger .info ("Research step completed" )
318281
319- # Step 4: Synthesize final answer
320- logger .info ("Step 4: Synthesis" )
321- if not self .agents .get ("synthesizer" ):
322- logger .warning ("No synthesizer agent available, using direct response" )
323- return self ._generate_general_response (query )
282+ # Get reasoning step
283+ reasoning_result = self .agents ["reasoner" ].reason (query , research_result ["context" ])
284+ logger .info ("Reasoning step completed" )
324285
325- final_answer = self .agents ["synthesizer" ].synthesize (query , reasoning_steps )
326- logger .info ("Final answer synthesized successfully" )
286+ # Get synthesis step
287+ synthesis_result = self .agents ["synthesizer" ].synthesize (
288+ query ,
289+ planning_result ["context" ],
290+ research_result ["context" ],
291+ reasoning_result ["context" ]
292+ )
293+ logger .info ("Synthesis step completed" )
327294
328295 return {
329- "answer" : final_answer ,
330- "context" : initial_context ,
331- "reasoning_steps" : reasoning_steps
296+ "answer" : synthesis_result ["answer" ],
297+ "reasoning_steps" : [
298+ planning_result ["answer" ],
299+ research_result ["answer" ],
300+ reasoning_result ["answer" ],
301+ synthesis_result ["answer" ]
302+ ],
303+ "context" : synthesis_result ["context" ]
332304 }
305+
333306 except Exception as e :
334307 logger .error (f"Error in CoT processing: { str (e )} " )
335- logger .info ("Falling back to general response" )
336- return self ._generate_general_response (query )
308+ raise
337309
338310 def _process_query_standard (self , query : str ) -> Dict [str , Any ]:
339- """Process query using standard approach without Chain of Thought"""
340- # Initialize context variables
341- pdf_context = []
342- repo_context = []
343-
344- # Get context based on selected collection
345- if self .collection == "PDF Collection" :
346- logger .info (f"Retrieving context from PDF Collection for query: '{ query } '" )
347- pdf_context = self .vector_store .query_pdf_collection (query )
348- logger .info (f"Retrieved { len (pdf_context )} chunks from PDF Collection" )
349- # Don't log individual sources to keep console clean
350- elif self .collection == "Repository Collection" :
351- logger .info (f"Retrieving context from Repository Collection for query: '{ query } '" )
352- repo_context = self .vector_store .query_repo_collection (query )
353- logger .info (f"Retrieved { len (repo_context )} chunks from Repository Collection" )
354- # Don't log individual sources to keep console clean
355-
356- # Combine all context
357- all_context = pdf_context + repo_context
358-
359- # Generate response using context if available, otherwise use general knowledge
360- if all_context :
361- logger .info (f"Generating response using { len (all_context )} context chunks" )
362- response = self ._generate_response (query , all_context )
363- else :
364- logger .info ("No context found, using general knowledge" )
365- response = self ._generate_general_response (query )
366-
367- return response
311+ """Process query using standard RAG approach"""
312+ try :
313+ # Get context based on collection type
314+ if self .collection == "PDF Collection" :
315+ context = self .vector_store .query_pdf_collection (query )
316+ elif self .collection == "Repository Collection" :
317+ context = self .vector_store .query_repo_collection (query )
318+ elif self .collection == "Web Knowledge Base" :
319+ context = self .vector_store .query_web_collection (query )
320+ else :
321+ context = []
322+
323+ # Log number of chunks retrieved
324+ logger .info (f"Retrieved { len (context )} chunks from { self .collection } " )
325+
326+ # Generate response using context
327+ response = self ._generate_response (query , context )
328+ return response
329+
330+ except Exception as e :
331+ logger .error (f"Error in standard processing: { str (e )} " )
332+ raise
368333
369334 def _generate_text (self , prompt : str , max_length : int = 512 ) -> str :
370335 """Generate text using the local model"""
@@ -456,7 +421,7 @@ def main():
456421 parser .add_argument ("--model" , default = "mistralai/Mistral-7B-Instruct-v0.2" , help = "Model to use" )
457422 parser .add_argument ("--quiet" , action = "store_true" , help = "Disable verbose logging" )
458423 parser .add_argument ("--use-cot" , action = "store_true" , help = "Enable Chain of Thought reasoning" )
459- parser .add_argument ("--collection" , choices = ["PDF Collection" , "Repository Collection" , "General Knowledge" ],
424+ parser .add_argument ("--collection" , choices = ["PDF Collection" , "Repository Collection" , "General Knowledge" , "Web Knowledge Base" ],
460425 help = "Specify which collection to query" )
461426 parser .add_argument ("--skip-analysis" , action = "store_true" , help = "Skip query analysis step" )
462427 parser .add_argument ("--verbose" , action = "store_true" , help = "Show full content of sources" )
0 commit comments