@@ -245,126 +245,132 @@ def process_query(self, query: str) -> Dict[str, Any]:
245
245
else :
246
246
return self ._generate_general_response (query )
247
247
else :
248
- # For PDF or Repository collections, use context-based processing
248
+ # For PDF, Repository, or Web collections, use context-based processing
249
249
if self .use_cot :
250
250
return self ._process_query_with_cot (query )
251
251
else :
252
252
return self ._process_query_standard (query )
253
253
254
254
def _process_query_with_cot (self , query : str ) -> Dict [str , Any ]:
255
- """Process query using Chain of Thought reasoning with multiple agents"""
256
- logger .info ("Processing query with Chain of Thought reasoning" )
257
-
258
- # Get initial context based on selected collection
259
- initial_context = []
260
- if self .collection == "PDF Collection" :
261
- logger .info (f"Retrieving context from PDF Collection for query: '{ query } '" )
262
- pdf_context = self .vector_store .query_pdf_collection (query )
263
- initial_context .extend (pdf_context )
264
- logger .info (f"Retrieved { len (pdf_context )} chunks from PDF Collection" )
265
- # Don't log individual sources to keep console clean
266
- elif self .collection == "Repository Collection" :
267
- logger .info (f"Retrieving context from Repository Collection for query: '{ query } '" )
268
- repo_context = self .vector_store .query_repo_collection (query )
269
- initial_context .extend (repo_context )
270
- logger .info (f"Retrieved { len (repo_context )} chunks from Repository Collection" )
271
- # Don't log individual sources to keep console clean
272
- # For General Knowledge, no context is needed
273
- else :
274
- logger .info ("Using General Knowledge collection, no context retrieval needed" )
275
-
255
+ """Process query using Chain of Thought reasoning"""
276
256
try :
277
- # Step 1: Planning
278
- logger .info ("Step 1: Planning" )
279
- if not self .agents or "planner" not in self .agents :
280
- logger .warning ("No planner agent available, using direct response" )
281
- return self ._generate_general_response (query )
257
+ # Get context based on collection type
258
+ if self .collection == "PDF Collection" :
259
+ context = self .vector_store .query_pdf_collection (query )
260
+ elif self .collection == "Repository Collection" :
261
+ context = self .vector_store .query_repo_collection (query )
262
+ elif self .collection == "Web Knowledge Base" :
263
+ context = self .vector_store .query_web_collection (query )
264
+ else :
265
+ context = []
266
+
267
+ # Log number of chunks retrieved
268
+ logger .info (f"Retrieved { len (context )} chunks from { self .collection } " )
282
269
283
- plan = self .agents ["planner" ].plan (query , initial_context )
284
- logger .info (f"Generated plan:\n { plan } " )
270
+ # Create agents if not already created
271
+ if not self .agents :
272
+ self .agents = create_agents (self .llm , self .vector_store )
285
273
286
- # Step 2: Research each step (if researcher is available)
287
- logger .info ("Step 2: Research" )
274
+ # Get planning step
275
+ try :
276
+ planning_result = self .agents ["planner" ].plan (query , context )
277
+ logger .info ("Planning step completed" )
278
+ except Exception as e :
279
+ logger .error (f"Error in planning step: { str (e )} " )
280
+ logger .info ("Falling back to general response" )
281
+ return self ._generate_general_response (query )
282
+
283
+ # Get research step
288
284
research_results = []
289
- if self .agents .get ("researcher" ) is not None and initial_context :
290
- for step in plan .split ("\n " ):
285
+ if self .agents .get ("researcher" ) is not None and context :
286
+ for step in planning_result .split ("\n " ):
291
287
if not step .strip ():
292
288
continue
293
- step_research = self .agents ["researcher" ].research (query , step )
294
- research_results .append ({"step" : step , "findings" : step_research })
295
- # Don't log source indices to keep console clean
296
- logger .info (f"Research for step: { step } " )
289
+ try :
290
+ step_research = self .agents ["researcher" ].research (query , step )
291
+ # Extract findings from research result
292
+ findings = step_research .get ("findings" , []) if isinstance (step_research , dict ) else []
293
+ research_results .append ({"step" : step , "findings" : findings })
294
+
295
+ # Log which sources were used for this step
296
+ try :
297
+ source_indices = [context .index (finding ) + 1 for finding in findings if finding in context ]
298
+ logger .info (f"Research for step: { step } \n Using sources: { source_indices } " )
299
+ except ValueError as ve :
300
+ logger .warning (f"Could not find some findings in initial context: { str (ve )} " )
301
+ except Exception as e :
302
+ logger .error (f"Error during research for step '{ step } ': { str (e )} " )
303
+ research_results .append ({"step" : step , "findings" : []})
297
304
else :
298
305
# If no researcher or no context, use the steps directly
299
- research_results = [{"step" : step , "findings" : []} for step in plan .split ("\n " ) if step .strip ()]
306
+ research_results = [{"step" : step , "findings" : []} for step in planning_result .split ("\n " ) if step .strip ()]
300
307
logger .info ("No research performed (no researcher agent or no context available)" )
301
308
302
- # Step 3: Reasoning about each step
303
- logger . info ( "Step 3: Reasoning" )
309
+ # Get reasoning step
310
+ reasoning_steps = []
304
311
if not self .agents .get ("reasoner" ):
305
312
logger .warning ("No reasoner agent available, using direct response" )
306
313
return self ._generate_general_response (query )
307
314
308
- reasoning_steps = []
309
315
for result in research_results :
310
- step_reasoning = self .agents ["reasoner" ].reason (
311
- query ,
312
- result ["step" ],
313
- result ["findings" ] if result ["findings" ] else [{"content" : "Using general knowledge" , "metadata" : {"source" : "General Knowledge" }}]
314
- )
315
- reasoning_steps .append (step_reasoning )
316
- # Log just the step, not the full reasoning
317
- logger .info (f"Reasoning for step: { result ['step' ]} " )
316
+ try :
317
+ step_reasoning = self .agents ["reasoner" ].reason (
318
+ query ,
319
+ result ["step" ],
320
+ result ["findings" ] if result ["findings" ] else [{"content" : "Using general knowledge" , "metadata" : {"source" : "General Knowledge" }}]
321
+ )
322
+ reasoning_steps .append (step_reasoning )
323
+ logger .info (f"Reasoning for step: { result ['step' ]} \n { step_reasoning } " )
324
+ except Exception as e :
325
+ logger .error (f"Error in reasoning for step '{ result ['step' ]} ': { str (e )} " )
326
+ reasoning_steps .append (f"Error in reasoning for this step: { str (e )} " )
318
327
319
- # Step 4: Synthesize final answer
320
- logger .info ("Step 4: Synthesis" )
328
+ # Get synthesis step
321
329
if not self .agents .get ("synthesizer" ):
322
330
logger .warning ("No synthesizer agent available, using direct response" )
323
331
return self ._generate_general_response (query )
324
332
325
- final_answer = self .agents ["synthesizer" ].synthesize (query , reasoning_steps )
326
- logger .info ("Final answer synthesized successfully" )
333
+ try :
334
+ synthesis_result = self .agents ["synthesizer" ].synthesize (query , reasoning_steps )
335
+ logger .info ("Synthesis step completed" )
336
+ except Exception as e :
337
+ logger .error (f"Error in synthesis step: { str (e )} " )
338
+ logger .info ("Falling back to general response" )
339
+ return self ._generate_general_response (query )
327
340
328
341
return {
329
- "answer" : final_answer ,
330
- "context " : initial_context ,
331
- "reasoning_steps " : reasoning_steps
342
+ "answer" : synthesis_result [ "answer" ] ,
343
+ "reasoning_steps " : reasoning_steps ,
344
+ "context " : context
332
345
}
346
+
333
347
except Exception as e :
334
348
logger .error (f"Error in CoT processing: { str (e )} " )
335
- logger .info ("Falling back to general response" )
336
- return self ._generate_general_response (query )
349
+ raise
337
350
338
351
def _process_query_standard (self , query : str ) -> Dict [str , Any ]:
339
- """Process query using standard approach without Chain of Thought"""
340
- # Initialize context variables
341
- pdf_context = []
342
- repo_context = []
343
-
344
- # Get context based on selected collection
345
- if self .collection == "PDF Collection" :
346
- logger .info (f"Retrieving context from PDF Collection for query: '{ query } '" )
347
- pdf_context = self .vector_store .query_pdf_collection (query )
348
- logger .info (f"Retrieved { len (pdf_context )} chunks from PDF Collection" )
349
- # Don't log individual sources to keep console clean
350
- elif self .collection == "Repository Collection" :
351
- logger .info (f"Retrieving context from Repository Collection for query: '{ query } '" )
352
- repo_context = self .vector_store .query_repo_collection (query )
353
- logger .info (f"Retrieved { len (repo_context )} chunks from Repository Collection" )
354
- # Don't log individual sources to keep console clean
355
-
356
- # Combine all context
357
- all_context = pdf_context + repo_context
358
-
359
- # Generate response using context if available, otherwise use general knowledge
360
- if all_context :
361
- logger .info (f"Generating response using { len (all_context )} context chunks" )
362
- response = self ._generate_response (query , all_context )
363
- else :
364
- logger .info ("No context found, using general knowledge" )
365
- response = self ._generate_general_response (query )
366
-
367
- return response
352
+ """Process query using standard RAG approach"""
353
+ try :
354
+ # Get context based on collection type
355
+ if self .collection == "PDF Collection" :
356
+ context = self .vector_store .query_pdf_collection (query )
357
+ elif self .collection == "Repository Collection" :
358
+ context = self .vector_store .query_repo_collection (query )
359
+ elif self .collection == "Web Knowledge Base" :
360
+ context = self .vector_store .query_web_collection (query )
361
+ else :
362
+ context = []
363
+
364
+ # Log number of chunks retrieved
365
+ logger .info (f"Retrieved { len (context )} chunks from { self .collection } " )
366
+
367
+ # Generate response using context
368
+ response = self ._generate_response (query , context )
369
+ return response
370
+
371
+ except Exception as e :
372
+ logger .error (f"Error in standard processing: { str (e )} " )
373
+ raise
368
374
369
375
def _generate_text (self , prompt : str , max_length : int = 512 ) -> str :
370
376
"""Generate text using the local model"""
@@ -456,7 +462,7 @@ def main():
456
462
parser .add_argument ("--model" , default = "mistralai/Mistral-7B-Instruct-v0.2" , help = "Model to use" )
457
463
parser .add_argument ("--quiet" , action = "store_true" , help = "Disable verbose logging" )
458
464
parser .add_argument ("--use-cot" , action = "store_true" , help = "Enable Chain of Thought reasoning" )
459
- parser .add_argument ("--collection" , choices = ["PDF Collection" , "Repository Collection" , "General Knowledge" ],
465
+ parser .add_argument ("--collection" , choices = ["PDF Collection" , "Repository Collection" , "General Knowledge" , "Web Knowledge Base" ],
460
466
help = "Specify which collection to query" )
461
467
parser .add_argument ("--skip-analysis" , action = "store_true" , help = "Skip query analysis step" )
462
468
parser .add_argument ("--verbose" , action = "store_true" , help = "Show full content of sources" )
0 commit comments