@@ -245,126 +245,132 @@ def process_query(self, query: str) -> Dict[str, Any]:
245245 else :
246246 return self ._generate_general_response (query )
247247 else :
248- # For PDF or Repository collections, use context-based processing
248+ # For PDF, Repository, or Web collections, use context-based processing
249249 if self .use_cot :
250250 return self ._process_query_with_cot (query )
251251 else :
252252 return self ._process_query_standard (query )
253253
254254 def _process_query_with_cot (self , query : str ) -> Dict [str , Any ]:
255- """Process query using Chain of Thought reasoning with multiple agents"""
256- logger .info ("Processing query with Chain of Thought reasoning" )
257-
258- # Get initial context based on selected collection
259- initial_context = []
260- if self .collection == "PDF Collection" :
261- logger .info (f"Retrieving context from PDF Collection for query: '{ query } '" )
262- pdf_context = self .vector_store .query_pdf_collection (query )
263- initial_context .extend (pdf_context )
264- logger .info (f"Retrieved { len (pdf_context )} chunks from PDF Collection" )
265- # Don't log individual sources to keep console clean
266- elif self .collection == "Repository Collection" :
267- logger .info (f"Retrieving context from Repository Collection for query: '{ query } '" )
268- repo_context = self .vector_store .query_repo_collection (query )
269- initial_context .extend (repo_context )
270- logger .info (f"Retrieved { len (repo_context )} chunks from Repository Collection" )
271- # Don't log individual sources to keep console clean
272- # For General Knowledge, no context is needed
273- else :
274- logger .info ("Using General Knowledge collection, no context retrieval needed" )
275-
255+ """Process query using Chain of Thought reasoning"""
276256 try :
277- # Step 1: Planning
278- logger .info ("Step 1: Planning" )
279- if not self .agents or "planner" not in self .agents :
280- logger .warning ("No planner agent available, using direct response" )
281- return self ._generate_general_response (query )
257+ # Get context based on collection type
258+ if self .collection == "PDF Collection" :
259+ context = self .vector_store .query_pdf_collection (query )
260+ elif self .collection == "Repository Collection" :
261+ context = self .vector_store .query_repo_collection (query )
262+ elif self .collection == "Web Knowledge Base" :
263+ context = self .vector_store .query_web_collection (query )
264+ else :
265+ context = []
266+
267+ # Log number of chunks retrieved
268+ logger .info (f"Retrieved { len (context )} chunks from { self .collection } " )
282269
283- plan = self .agents ["planner" ].plan (query , initial_context )
284- logger .info (f"Generated plan:\n { plan } " )
270+ # Create agents if not already created
271+ if not self .agents :
272+ self .agents = create_agents (self .llm , self .vector_store )
285273
286- # Step 2: Research each step (if researcher is available)
287- logger .info ("Step 2: Research" )
274+ # Get planning step
275+ try :
276+ planning_result = self .agents ["planner" ].plan (query , context )
277+ logger .info ("Planning step completed" )
278+ except Exception as e :
279+ logger .error (f"Error in planning step: { str (e )} " )
280+ logger .info ("Falling back to general response" )
281+ return self ._generate_general_response (query )
282+
283+ # Get research step
288284 research_results = []
289- if self .agents .get ("researcher" ) is not None and initial_context :
290- for step in plan .split ("\n " ):
285+ if self .agents .get ("researcher" ) is not None and context :
286+ for step in planning_result .split ("\n " ):
291287 if not step .strip ():
292288 continue
293- step_research = self .agents ["researcher" ].research (query , step )
294- research_results .append ({"step" : step , "findings" : step_research })
295- # Don't log source indices to keep console clean
296- logger .info (f"Research for step: { step } " )
289+ try :
290+ step_research = self .agents ["researcher" ].research (query , step )
291+ # Extract findings from research result
292+ findings = step_research .get ("findings" , []) if isinstance (step_research , dict ) else []
293+ research_results .append ({"step" : step , "findings" : findings })
294+
295+ # Log which sources were used for this step
296+ try :
297+ source_indices = [context .index (finding ) + 1 for finding in findings if finding in context ]
298+ logger .info (f"Research for step: { step } \n Using sources: { source_indices } " )
299+ except ValueError as ve :
300+ logger .warning (f"Could not find some findings in initial context: { str (ve )} " )
301+ except Exception as e :
302+ logger .error (f"Error during research for step '{ step } ': { str (e )} " )
303+ research_results .append ({"step" : step , "findings" : []})
297304 else :
298305 # If no researcher or no context, use the steps directly
299- research_results = [{"step" : step , "findings" : []} for step in plan .split ("\n " ) if step .strip ()]
306+ research_results = [{"step" : step , "findings" : []} for step in planning_result .split ("\n " ) if step .strip ()]
300307 logger .info ("No research performed (no researcher agent or no context available)" )
301308
302- # Step 3: Reasoning about each step
303- logger . info ( "Step 3: Reasoning" )
309+ # Get reasoning step
310+ reasoning_steps = []
304311 if not self .agents .get ("reasoner" ):
305312 logger .warning ("No reasoner agent available, using direct response" )
306313 return self ._generate_general_response (query )
307314
308- reasoning_steps = []
309315 for result in research_results :
310- step_reasoning = self .agents ["reasoner" ].reason (
311- query ,
312- result ["step" ],
313- result ["findings" ] if result ["findings" ] else [{"content" : "Using general knowledge" , "metadata" : {"source" : "General Knowledge" }}]
314- )
315- reasoning_steps .append (step_reasoning )
316- # Log just the step, not the full reasoning
317- logger .info (f"Reasoning for step: { result ['step' ]} " )
316+ try :
317+ step_reasoning = self .agents ["reasoner" ].reason (
318+ query ,
319+ result ["step" ],
320+ result ["findings" ] if result ["findings" ] else [{"content" : "Using general knowledge" , "metadata" : {"source" : "General Knowledge" }}]
321+ )
322+ reasoning_steps .append (step_reasoning )
323+ logger .info (f"Reasoning for step: { result ['step' ]} \n { step_reasoning } " )
324+ except Exception as e :
325+ logger .error (f"Error in reasoning for step '{ result ['step' ]} ': { str (e )} " )
326+ reasoning_steps .append (f"Error in reasoning for this step: { str (e )} " )
318327
319- # Step 4: Synthesize final answer
320- logger .info ("Step 4: Synthesis" )
328+ # Get synthesis step
321329 if not self .agents .get ("synthesizer" ):
322330 logger .warning ("No synthesizer agent available, using direct response" )
323331 return self ._generate_general_response (query )
324332
325- final_answer = self .agents ["synthesizer" ].synthesize (query , reasoning_steps )
326- logger .info ("Final answer synthesized successfully" )
333+ try :
334+ synthesis_result = self .agents ["synthesizer" ].synthesize (query , reasoning_steps )
335+ logger .info ("Synthesis step completed" )
336+ except Exception as e :
337+ logger .error (f"Error in synthesis step: { str (e )} " )
338+ logger .info ("Falling back to general response" )
339+ return self ._generate_general_response (query )
327340
328341 return {
329- "answer" : final_answer ,
330- "context " : initial_context ,
331- "reasoning_steps " : reasoning_steps
342+ "answer" : synthesis_result [ "answer" ] ,
343+ "reasoning_steps " : reasoning_steps ,
344+ "context " : context
332345 }
346+
333347 except Exception as e :
334348 logger .error (f"Error in CoT processing: { str (e )} " )
335- logger .info ("Falling back to general response" )
336- return self ._generate_general_response (query )
349+ raise
337350
338351 def _process_query_standard (self , query : str ) -> Dict [str , Any ]:
339- """Process query using standard approach without Chain of Thought"""
340- # Initialize context variables
341- pdf_context = []
342- repo_context = []
343-
344- # Get context based on selected collection
345- if self .collection == "PDF Collection" :
346- logger .info (f"Retrieving context from PDF Collection for query: '{ query } '" )
347- pdf_context = self .vector_store .query_pdf_collection (query )
348- logger .info (f"Retrieved { len (pdf_context )} chunks from PDF Collection" )
349- # Don't log individual sources to keep console clean
350- elif self .collection == "Repository Collection" :
351- logger .info (f"Retrieving context from Repository Collection for query: '{ query } '" )
352- repo_context = self .vector_store .query_repo_collection (query )
353- logger .info (f"Retrieved { len (repo_context )} chunks from Repository Collection" )
354- # Don't log individual sources to keep console clean
355-
356- # Combine all context
357- all_context = pdf_context + repo_context
358-
359- # Generate response using context if available, otherwise use general knowledge
360- if all_context :
361- logger .info (f"Generating response using { len (all_context )} context chunks" )
362- response = self ._generate_response (query , all_context )
363- else :
364- logger .info ("No context found, using general knowledge" )
365- response = self ._generate_general_response (query )
366-
367- return response
352+ """Process query using standard RAG approach"""
353+ try :
354+ # Get context based on collection type
355+ if self .collection == "PDF Collection" :
356+ context = self .vector_store .query_pdf_collection (query )
357+ elif self .collection == "Repository Collection" :
358+ context = self .vector_store .query_repo_collection (query )
359+ elif self .collection == "Web Knowledge Base" :
360+ context = self .vector_store .query_web_collection (query )
361+ else :
362+ context = []
363+
364+ # Log number of chunks retrieved
365+ logger .info (f"Retrieved { len (context )} chunks from { self .collection } " )
366+
367+ # Generate response using context
368+ response = self ._generate_response (query , context )
369+ return response
370+
371+ except Exception as e :
372+ logger .error (f"Error in standard processing: { str (e )} " )
373+ raise
368374
369375 def _generate_text (self , prompt : str , max_length : int = 512 ) -> str :
370376 """Generate text using the local model"""
@@ -456,7 +462,7 @@ def main():
456462 parser .add_argument ("--model" , default = "mistralai/Mistral-7B-Instruct-v0.2" , help = "Model to use" )
457463 parser .add_argument ("--quiet" , action = "store_true" , help = "Disable verbose logging" )
458464 parser .add_argument ("--use-cot" , action = "store_true" , help = "Enable Chain of Thought reasoning" )
459- parser .add_argument ("--collection" , choices = ["PDF Collection" , "Repository Collection" , "General Knowledge" ],
465+ parser .add_argument ("--collection" , choices = ["PDF Collection" , "Repository Collection" , "General Knowledge" , "Web Knowledge Base" ],
460466 help = "Specify which collection to query" )
461467 parser .add_argument ("--skip-analysis" , action = "store_true" , help = "Skip query analysis step" )
462468 parser .add_argument ("--verbose" , action = "store_true" , help = "Show full content of sources" )
0 commit comments