Skip to content

Commit 9e1ecca

Browse files
committed
integrate agent-mode in test framework
1 parent 81a3fad commit 9e1ecca

File tree

2 files changed

+113
-41
lines changed

2 files changed

+113
-41
lines changed

tests/conftest.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,18 @@ def pytest_addoption(parser):
8989
default=None,
9090
help="System prompt mode (overrides config)"
9191
)
92+
group.addoption(
93+
"--use-agent",
94+
action="store_true",
95+
default=None,
96+
help="Enable agent mode (overrides config)"
97+
)
98+
group.addoption(
99+
"--no-agent",
100+
action="store_true",
101+
default=None,
102+
help="Disable agent mode (overrides config)"
103+
)
92104

93105
# === Testing Options ===
94106
group.addoption(
@@ -176,8 +188,23 @@ def config(pytestconfig):
176188
# Query Enhancement (HyDE)
177189
"use_hyde": cfg.get("use_hyde", False),
178190
"hyde_max_tokens": cfg.get("hyde_max_tokens", 100),
191+
192+
# Agent Mode
193+
"agent_reasoning_limit": cfg.get("agent_reasoning_limit", 5),
194+
"agent_tool_limit": cfg.get("agent_tool_limit", 20),
179195
}
180196

197+
# Handle agent mode
198+
use_agent_cli = pytestconfig.getoption("--use-agent")
199+
no_agent_cli = pytestconfig.getoption("--no-agent")
200+
201+
if use_agent_cli:
202+
merged_config["use_agent"] = True
203+
elif no_agent_cli:
204+
merged_config["use_agent"] = False
205+
else:
206+
merged_config["use_agent"] = cfg.get("use_agent", False)
207+
181208
# Handle enable/disable chunks
182209
disable_chunks_cli = pytestconfig.getoption("--disable-chunks")
183210

tests/test_benchmarks.py

Lines changed: 86 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ def print_test_config(config, scorer):
5858
print(f" Chunks Enabled: {not config['disable_chunks']}")
5959
print(f" Golden Chunks: {config['use_golden_chunks']}")
6060
print(f" HyDE Enabled: {config.get('use_hyde', False)}")
61+
print(f" Agent Mode: {config.get('use_agent', False)}")
62+
if config.get('use_agent', False):
63+
print(f" • Reasoning Limit: {config.get('agent_reasoning_limit', 5)}")
64+
print(f" • Tool Limit: {config.get('agent_tool_limit', 20)}")
6165
print(f" Output Mode: {config['output_mode']}")
6266
print(f" Metrics: {', '.join(active_metrics)}")
6367
print(f"{'='*60}\n")
@@ -147,6 +151,9 @@ def run_benchmark(benchmark, config, results_dir, scorer):
147151
"system_prompt_mode": config["system_prompt_mode"],
148152
"disable_chunks": config["disable_chunks"],
149153
"use_golden_chunks": config["use_golden_chunks"],
154+
"use_agent": config.get("use_agent", False),
155+
"agent_reasoning_limit": config.get("agent_reasoning_limit", 5),
156+
"agent_tool_limit": config.get("agent_tool_limit", 20),
150157
}
151158
}
152159

@@ -173,7 +180,6 @@ def get_tokensmith_answer(question, config, golden_chunks=None):
173180
Returns:
174181
tuple: (Generated answer, chunks_info list, hyde_query)
175182
"""
176-
from src.main import get_answer
177183
from src.instrumentation.logging import init_logger, get_logger
178184
from src.config import RAGConfig
179185
from src.retriever import BM25Retriever, FAISSRetriever, IndexKeywordRetriever, load_artifacts
@@ -216,7 +222,9 @@ def get_tokensmith_answer(question, config, golden_chunks=None):
216222
)
217223

218224
# Print status
219-
if golden_chunks and config["use_golden_chunks"]:
225+
if config.get("use_agent", False):
226+
print(f" 🤖 Agent mode enabled")
227+
elif golden_chunks and config["use_golden_chunks"]:
220228
print(f" 📌 Using {len(golden_chunks)} golden chunks")
221229
elif config["disable_chunks"]:
222230
print(f" 📭 No chunks (baseline mode)")
@@ -235,47 +243,84 @@ def get_tokensmith_answer(question, config, golden_chunks=None):
235243
index_prefix=config["index_prefix"]
236244
)
237245

238-
retrievers = [
239-
FAISSRetriever(faiss_index, cfg.embed_model),
240-
BM25Retriever(bm25_index)
241-
]
242-
243-
# Add index keyword retriever if weight > 0
244-
if cfg.ranker_weights.get("index_keywords", 0) > 0:
245-
retrievers.append(
246-
IndexKeywordRetriever(cfg.extracted_index_path, cfg.page_to_chunk_map_path)
246+
# Check if agent mode is enabled
247+
if config.get("use_agent", False):
248+
# Use agent orchestrator path
249+
from src.agent.tools import AgentToolkit
250+
from src.agent.orchestrator import AgentOrchestrator, AgentConfig
251+
252+
toolkit = AgentToolkit(
253+
faiss_index=faiss_index,
254+
chunks=chunks,
255+
sources=sources,
256+
embed_model=cfg.embed_model,
257+
markdown_path="data/book_with_pages.md",
258+
summaries_path="data/section_summaries.json",
247259
)
248-
249-
ranker = EnsembleRanker(
250-
ensemble_method=cfg.ensemble_method,
251-
weights=cfg.ranker_weights,
252-
rrf_k=int(cfg.rrf_k)
253-
)
254-
255-
# Package artifacts for reuse
256-
artifacts = {
257-
"chunks": chunks,
258-
"sources": sources,
259-
"retrievers": retrievers,
260-
"ranker": ranker
261-
}
262-
263-
result = get_answer(
264-
question=question,
265-
cfg=cfg,
266-
args=args,
267-
logger=logger,
268-
artifacts=artifacts,
269-
console=None,
270-
golden_chunks=golden_chunks,
271-
is_test_mode=True
272-
)
273-
274-
# Handle return value (answer, chunks_info, hyde_query) or just answer
275-
if isinstance(result, tuple):
276-
generated, chunks_info, hyde_query = result
260+
261+
agent_config = AgentConfig(
262+
reasoning_limit=cfg.agent_reasoning_limit,
263+
tool_limit=cfg.agent_tool_limit,
264+
max_generation_tokens=cfg.max_gen_tokens,
265+
)
266+
267+
orchestrator = AgentOrchestrator(
268+
toolkit=toolkit,
269+
model_path=args.model_path or cfg.model_path,
270+
config=agent_config,
271+
)
272+
273+
# Run agent and get answer
274+
result = orchestrator.run(question)
275+
generated = result["answer"]
276+
chunks_info = None # Agent mode doesn't provide chunks_info in the same format
277+
hyde_query = None
278+
277279
else:
278-
generated, chunks_info, hyde_query = result, None, None
280+
# Use standard get_answer path
281+
from src.main import get_answer
282+
283+
retrievers = [
284+
FAISSRetriever(faiss_index, cfg.embed_model),
285+
BM25Retriever(bm25_index)
286+
]
287+
288+
# Add index keyword retriever if weight > 0
289+
if cfg.ranker_weights.get("index_keywords", 0) > 0:
290+
retrievers.append(
291+
IndexKeywordRetriever(cfg.extracted_index_path, cfg.page_to_chunk_map_path)
292+
)
293+
294+
ranker = EnsembleRanker(
295+
ensemble_method=cfg.ensemble_method,
296+
weights=cfg.ranker_weights,
297+
rrf_k=int(cfg.rrf_k)
298+
)
299+
300+
# Package artifacts for reuse
301+
artifacts = {
302+
"chunks": chunks,
303+
"sources": sources,
304+
"retrievers": retrievers,
305+
"ranker": ranker
306+
}
307+
308+
result = get_answer(
309+
question=question,
310+
cfg=cfg,
311+
args=args,
312+
logger=logger,
313+
artifacts=artifacts,
314+
console=None,
315+
golden_chunks=golden_chunks,
316+
is_test_mode=True
317+
)
318+
319+
# Handle return value (answer, chunks_info, hyde_query) or just answer
320+
if isinstance(result, tuple):
321+
generated, chunks_info, hyde_query = result
322+
else:
323+
generated, chunks_info, hyde_query = result, None, None
279324

280325
# Clean answer - extract up to end token if present
281326
generated = clean_answer(generated)

0 commit comments

Comments
 (0)