@@ -245,126 +245,91 @@ def process_query(self, query: str) -> Dict[str, Any]:
245
245
else :
246
246
return self ._generate_general_response (query )
247
247
else :
248
- # For PDF or Repository collections, use context-based processing
248
+ # For PDF, Repository, or Web collections, use context-based processing
249
249
if self .use_cot :
250
250
return self ._process_query_with_cot (query )
251
251
else :
252
252
return self ._process_query_standard (query )
253
253
254
254
def _process_query_with_cot (self , query : str ) -> Dict [str , Any ]:
255
- """Process query using Chain of Thought reasoning with multiple agents"""
256
- logger .info ("Processing query with Chain of Thought reasoning" )
257
-
258
- # Get initial context based on selected collection
259
- initial_context = []
260
- if self .collection == "PDF Collection" :
261
- logger .info (f"Retrieving context from PDF Collection for query: '{ query } '" )
262
- pdf_context = self .vector_store .query_pdf_collection (query )
263
- initial_context .extend (pdf_context )
264
- logger .info (f"Retrieved { len (pdf_context )} chunks from PDF Collection" )
265
- # Don't log individual sources to keep console clean
266
- elif self .collection == "Repository Collection" :
267
- logger .info (f"Retrieving context from Repository Collection for query: '{ query } '" )
268
- repo_context = self .vector_store .query_repo_collection (query )
269
- initial_context .extend (repo_context )
270
- logger .info (f"Retrieved { len (repo_context )} chunks from Repository Collection" )
271
- # Don't log individual sources to keep console clean
272
- # For General Knowledge, no context is needed
273
- else :
274
- logger .info ("Using General Knowledge collection, no context retrieval needed" )
275
-
255
+ """Process query using Chain of Thought reasoning"""
276
256
try :
277
- # Step 1: Planning
278
- logger .info ("Step 1: Planning" )
279
- if not self .agents or "planner" not in self .agents :
280
- logger .warning ("No planner agent available, using direct response" )
281
- return self ._generate_general_response (query )
257
+ # Get context based on collection type
258
+ if self .collection == "PDF Collection" :
259
+ context = self .vector_store .query_pdf_collection (query )
260
+ elif self .collection == "Repository Collection" :
261
+ context = self .vector_store .query_repo_collection (query )
262
+ elif self .collection == "Web Knowledge Base" :
263
+ context = self .vector_store .query_web_collection (query )
264
+ else :
265
+ context = []
282
266
283
- plan = self . agents [ "planner" ]. plan ( query , initial_context )
284
- logger .info (f"Generated plan: \n { plan } " )
267
+ # Log number of chunks retrieved
268
+ logger .info (f"Retrieved { len ( context ) } chunks from { self . collection } " )
285
269
286
- # Step 2: Research each step (if researcher is available)
287
- logger .info ("Step 2: Research" )
288
- research_results = []
289
- if self .agents .get ("researcher" ) is not None and initial_context :
290
- for step in plan .split ("\n " ):
291
- if not step .strip ():
292
- continue
293
- step_research = self .agents ["researcher" ].research (query , step )
294
- research_results .append ({"step" : step , "findings" : step_research })
295
- # Don't log source indices to keep console clean
296
- logger .info (f"Research for step: { step } " )
297
- else :
298
- # If no researcher or no context, use the steps directly
299
- research_results = [{"step" : step , "findings" : []} for step in plan .split ("\n " ) if step .strip ()]
300
- logger .info ("No research performed (no researcher agent or no context available)" )
270
+ # Create agents if not already created
271
+ if not self .agents :
272
+ self .agents = create_agents (self .llm , self .vector_store )
301
273
302
- # Step 3: Reasoning about each step
303
- logger .info ("Step 3: Reasoning" )
304
- if not self .agents .get ("reasoner" ):
305
- logger .warning ("No reasoner agent available, using direct response" )
306
- return self ._generate_general_response (query )
274
+ # Get planning step
275
+ planning_result = self .agents ["planner" ].plan (query , context )
276
+ logger .info ("Planning step completed" )
307
277
308
- reasoning_steps = []
309
- for result in research_results :
310
- step_reasoning = self .agents ["reasoner" ].reason (
311
- query ,
312
- result ["step" ],
313
- result ["findings" ] if result ["findings" ] else [{"content" : "Using general knowledge" , "metadata" : {"source" : "General Knowledge" }}]
314
- )
315
- reasoning_steps .append (step_reasoning )
316
- # Log just the step, not the full reasoning
317
- logger .info (f"Reasoning for step: { result ['step' ]} " )
278
+ # Get research step
279
+ research_result = self .agents ["researcher" ].research (query , context )
280
+ logger .info ("Research step completed" )
318
281
319
- # Step 4: Synthesize final answer
320
- logger .info ("Step 4: Synthesis" )
321
- if not self .agents .get ("synthesizer" ):
322
- logger .warning ("No synthesizer agent available, using direct response" )
323
- return self ._generate_general_response (query )
282
+ # Get reasoning step
283
+ reasoning_result = self .agents ["reasoner" ].reason (query , research_result ["context" ])
284
+ logger .info ("Reasoning step completed" )
324
285
325
- final_answer = self .agents ["synthesizer" ].synthesize (query , reasoning_steps )
326
- logger .info ("Final answer synthesized successfully" )
286
+ # Get synthesis step
287
+ synthesis_result = self .agents ["synthesizer" ].synthesize (
288
+ query ,
289
+ planning_result ["context" ],
290
+ research_result ["context" ],
291
+ reasoning_result ["context" ]
292
+ )
293
+ logger .info ("Synthesis step completed" )
327
294
328
295
return {
329
- "answer" : final_answer ,
330
- "context" : initial_context ,
331
- "reasoning_steps" : reasoning_steps
296
+ "answer" : synthesis_result ["answer" ],
297
+ "reasoning_steps" : [
298
+ planning_result ["answer" ],
299
+ research_result ["answer" ],
300
+ reasoning_result ["answer" ],
301
+ synthesis_result ["answer" ]
302
+ ],
303
+ "context" : synthesis_result ["context" ]
332
304
}
305
+
333
306
except Exception as e :
334
307
logger .error (f"Error in CoT processing: { str (e )} " )
335
- logger .info ("Falling back to general response" )
336
- return self ._generate_general_response (query )
308
+ raise
337
309
338
310
def _process_query_standard (self , query : str ) -> Dict [str , Any ]:
339
- """Process query using standard approach without Chain of Thought"""
340
- # Initialize context variables
341
- pdf_context = []
342
- repo_context = []
343
-
344
- # Get context based on selected collection
345
- if self .collection == "PDF Collection" :
346
- logger .info (f"Retrieving context from PDF Collection for query: '{ query } '" )
347
- pdf_context = self .vector_store .query_pdf_collection (query )
348
- logger .info (f"Retrieved { len (pdf_context )} chunks from PDF Collection" )
349
- # Don't log individual sources to keep console clean
350
- elif self .collection == "Repository Collection" :
351
- logger .info (f"Retrieving context from Repository Collection for query: '{ query } '" )
352
- repo_context = self .vector_store .query_repo_collection (query )
353
- logger .info (f"Retrieved { len (repo_context )} chunks from Repository Collection" )
354
- # Don't log individual sources to keep console clean
355
-
356
- # Combine all context
357
- all_context = pdf_context + repo_context
358
-
359
- # Generate response using context if available, otherwise use general knowledge
360
- if all_context :
361
- logger .info (f"Generating response using { len (all_context )} context chunks" )
362
- response = self ._generate_response (query , all_context )
363
- else :
364
- logger .info ("No context found, using general knowledge" )
365
- response = self ._generate_general_response (query )
366
-
367
- return response
311
+ """Process query using standard RAG approach"""
312
+ try :
313
+ # Get context based on collection type
314
+ if self .collection == "PDF Collection" :
315
+ context = self .vector_store .query_pdf_collection (query )
316
+ elif self .collection == "Repository Collection" :
317
+ context = self .vector_store .query_repo_collection (query )
318
+ elif self .collection == "Web Knowledge Base" :
319
+ context = self .vector_store .query_web_collection (query )
320
+ else :
321
+ context = []
322
+
323
+ # Log number of chunks retrieved
324
+ logger .info (f"Retrieved { len (context )} chunks from { self .collection } " )
325
+
326
+ # Generate response using context
327
+ response = self ._generate_response (query , context )
328
+ return response
329
+
330
+ except Exception as e :
331
+ logger .error (f"Error in standard processing: { str (e )} " )
332
+ raise
368
333
369
334
def _generate_text (self , prompt : str , max_length : int = 512 ) -> str :
370
335
"""Generate text using the local model"""
@@ -456,7 +421,7 @@ def main():
456
421
parser .add_argument ("--model" , default = "mistralai/Mistral-7B-Instruct-v0.2" , help = "Model to use" )
457
422
parser .add_argument ("--quiet" , action = "store_true" , help = "Disable verbose logging" )
458
423
parser .add_argument ("--use-cot" , action = "store_true" , help = "Enable Chain of Thought reasoning" )
459
- parser .add_argument ("--collection" , choices = ["PDF Collection" , "Repository Collection" , "General Knowledge" ],
424
+ parser .add_argument ("--collection" , choices = ["PDF Collection" , "Repository Collection" , "General Knowledge" , "Web Knowledge Base" ],
460
425
help = "Specify which collection to query" )
461
426
parser .add_argument ("--skip-analysis" , action = "store_true" , help = "Skip query analysis step" )
462
427
parser .add_argument ("--verbose" , action = "store_true" , help = "Show full content of sources" )
0 commit comments