@@ -58,6 +58,10 @@ def print_test_config(config, scorer):
5858 print (f" Chunks Enabled: { not config ['disable_chunks' ]} " )
5959 print (f" Golden Chunks: { config ['use_golden_chunks' ]} " )
6060 print (f" HyDE Enabled: { config .get ('use_hyde' , False )} " )
61+ print (f" Agent Mode: { config .get ('use_agent' , False )} " )
62+ if config .get ('use_agent' , False ):
63+ print (f" • Reasoning Limit: { config .get ('agent_reasoning_limit' , 5 )} " )
64+ print (f" • Tool Limit: { config .get ('agent_tool_limit' , 20 )} " )
6165 print (f" Output Mode: { config ['output_mode' ]} " )
6266 print (f" Metrics: { ', ' .join (active_metrics )} " )
6367 print (f"{ '=' * 60 } \n " )
@@ -147,6 +151,9 @@ def run_benchmark(benchmark, config, results_dir, scorer):
147151 "system_prompt_mode" : config ["system_prompt_mode" ],
148152 "disable_chunks" : config ["disable_chunks" ],
149153 "use_golden_chunks" : config ["use_golden_chunks" ],
154+ "use_agent" : config .get ("use_agent" , False ),
155+ "agent_reasoning_limit" : config .get ("agent_reasoning_limit" , 5 ),
156+ "agent_tool_limit" : config .get ("agent_tool_limit" , 20 ),
150157 }
151158 }
152159
@@ -173,7 +180,6 @@ def get_tokensmith_answer(question, config, golden_chunks=None):
173180 Returns:
174181 tuple: (Generated answer, chunks_info list, hyde_query)
175182 """
176- from src .main import get_answer
177183 from src .instrumentation .logging import init_logger , get_logger
178184 from src .config import RAGConfig
179185 from src .retriever import BM25Retriever , FAISSRetriever , IndexKeywordRetriever , load_artifacts
@@ -216,7 +222,9 @@ def get_tokensmith_answer(question, config, golden_chunks=None):
216222 )
217223
218224 # Print status
219- if golden_chunks and config ["use_golden_chunks" ]:
225+ if config .get ("use_agent" , False ):
226+ print (f" 🤖 Agent mode enabled" )
227+ elif golden_chunks and config ["use_golden_chunks" ]:
220228 print (f" 📌 Using { len (golden_chunks )} golden chunks" )
221229 elif config ["disable_chunks" ]:
222230 print (f" 📭 No chunks (baseline mode)" )
@@ -235,47 +243,84 @@ def get_tokensmith_answer(question, config, golden_chunks=None):
235243 index_prefix = config ["index_prefix" ]
236244 )
237245
238- retrievers = [
239- FAISSRetriever (faiss_index , cfg .embed_model ),
240- BM25Retriever (bm25_index )
241- ]
242-
243- # Add index keyword retriever if weight > 0
244- if cfg .ranker_weights .get ("index_keywords" , 0 ) > 0 :
245- retrievers .append (
246- IndexKeywordRetriever (cfg .extracted_index_path , cfg .page_to_chunk_map_path )
246+ # Check if agent mode is enabled
247+ if config .get ("use_agent" , False ):
248+ # Use agent orchestrator path
249+ from src .agent .tools import AgentToolkit
250+ from src .agent .orchestrator import AgentOrchestrator , AgentConfig
251+
252+ toolkit = AgentToolkit (
253+ faiss_index = faiss_index ,
254+ chunks = chunks ,
255+ sources = sources ,
256+ embed_model = cfg .embed_model ,
257+ markdown_path = "data/book_with_pages.md" ,
258+ summaries_path = "data/section_summaries.json" ,
247259 )
248-
249- ranker = EnsembleRanker (
250- ensemble_method = cfg .ensemble_method ,
251- weights = cfg .ranker_weights ,
252- rrf_k = int (cfg .rrf_k )
253- )
254-
255- # Package artifacts for reuse
256- artifacts = {
257- "chunks" : chunks ,
258- "sources" : sources ,
259- "retrievers" : retrievers ,
260- "ranker" : ranker
261- }
262-
263- result = get_answer (
264- question = question ,
265- cfg = cfg ,
266- args = args ,
267- logger = logger ,
268- artifacts = artifacts ,
269- console = None ,
270- golden_chunks = golden_chunks ,
271- is_test_mode = True
272- )
273-
274- # Handle return value (answer, chunks_info, hyde_query) or just answer
275- if isinstance (result , tuple ):
276- generated , chunks_info , hyde_query = result
260+
261+ agent_config = AgentConfig (
262+ reasoning_limit = cfg .agent_reasoning_limit ,
263+ tool_limit = cfg .agent_tool_limit ,
264+ max_generation_tokens = cfg .max_gen_tokens ,
265+ )
266+
267+ orchestrator = AgentOrchestrator (
268+ toolkit = toolkit ,
269+ model_path = args .model_path or cfg .model_path ,
270+ config = agent_config ,
271+ )
272+
273+ # Run agent and get answer
274+ result = orchestrator .run (question )
275+ generated = result ["answer" ]
276+ chunks_info = None # Agent mode doesn't provide chunks_info in the same format
277+ hyde_query = None
278+
277279 else :
278- generated , chunks_info , hyde_query = result , None , None
280+ # Use standard get_answer path
281+ from src .main import get_answer
282+
283+ retrievers = [
284+ FAISSRetriever (faiss_index , cfg .embed_model ),
285+ BM25Retriever (bm25_index )
286+ ]
287+
288+ # Add index keyword retriever if weight > 0
289+ if cfg .ranker_weights .get ("index_keywords" , 0 ) > 0 :
290+ retrievers .append (
291+ IndexKeywordRetriever (cfg .extracted_index_path , cfg .page_to_chunk_map_path )
292+ )
293+
294+ ranker = EnsembleRanker (
295+ ensemble_method = cfg .ensemble_method ,
296+ weights = cfg .ranker_weights ,
297+ rrf_k = int (cfg .rrf_k )
298+ )
299+
300+ # Package artifacts for reuse
301+ artifacts = {
302+ "chunks" : chunks ,
303+ "sources" : sources ,
304+ "retrievers" : retrievers ,
305+ "ranker" : ranker
306+ }
307+
308+ result = get_answer (
309+ question = question ,
310+ cfg = cfg ,
311+ args = args ,
312+ logger = logger ,
313+ artifacts = artifacts ,
314+ console = None ,
315+ golden_chunks = golden_chunks ,
316+ is_test_mode = True
317+ )
318+
319+ # Handle return value (answer, chunks_info, hyde_query) or just answer
320+ if isinstance (result , tuple ):
321+ generated , chunks_info , hyde_query = result
322+ else :
323+ generated , chunks_info , hyde_query = result , None , None
279324
280325 # Clean answer - extract up to end token if present
281326 generated = clean_answer (generated )
0 commit comments