diff --git a/.gitignore b/.gitignore index 079db83e..cc79bf6f 100644 --- a/.gitignore +++ b/.gitignore @@ -197,4 +197,4 @@ uv.lock /astabench/evals/paper_finder/tasks/paper_finder_bench/normalizer_reference.json # AI docs -CLAUDE.md +CLAUDE.md \ No newline at end of file diff --git a/astabench/solvers/sqa/formatted_perplexity.py b/astabench/solvers/sqa/formatted_perplexity.py index 599413ba..71f405dd 100644 --- a/astabench/solvers/sqa/formatted_perplexity.py +++ b/astabench/solvers/sqa/formatted_perplexity.py @@ -1,17 +1,41 @@ import logging from inspect_ai.solver import Solver, chain, solver +from inspect_ai.model import ChatMessageAssistant from astabench.solvers.sqa.format_solver import format_solver from astabench.solvers.sqa.perplexity_base import perplexity_solver logger = logging.getLogger(__name__) +@solver +def add_perplexity_references() -> Solver: + + async def solve(state, generate): + m = state.messages[-1] + if not isinstance(m, ChatMessageAssistant): + raise ValueError("The last message must be from the assistant.") + + citations = m.content[-1].citations + + references_str = '\n\n## References\n\nEach reference below is in the format "[citation ID] {title} ({url})". Use `title` as the `excerpt`' + for idx, citation in enumerate(citations): + title, url = citation.title, citation.url + references_str += f'\n[{idx + 1}] {title} ({url})' + + state.messages.append(ChatMessageAssistant(content=m.text + references_str)) + return state + + return solve @solver def formatted_solver( system_prompt: str | None = None, search_context_size: str | None = None, + reasoning_effort: str = "", + search_mode: str = "", + require_snippets: bool = True, + scorer_model: str = "google/gemini-2.5-flash-preview-05-20" ) -> Solver: chainlist = [ perplexity_solver( @@ -19,7 +43,10 @@ def formatted_solver( prompt_template="{prompt_without_formatting_instructions}", system_message=system_prompt, search_context_size=search_context_size, + reasoning_effort=reasoning_effort, + search_mode=search_mode ), - format_solver("google/gemini-2.5-flash-preview-05-20"), + add_perplexity_references(), + format_solver(scorer_model, require_snippets=require_snippets), ] return chain(chainlist) diff --git a/astabench/solvers/sqa/perplexity_base.py b/astabench/solvers/sqa/perplexity_base.py index 3a38a91f..e84e8ebe 100644 --- a/astabench/solvers/sqa/perplexity_base.py +++ b/astabench/solvers/sqa/perplexity_base.py @@ -52,7 +52,8 @@ def perplexity_solver( system_message: str | None = None, use_structured_decoding: bool = False, search_context_size: str | None = None, - search_mode: str = "academic", + reasoning_effort: str = "high", + search_mode: str = "", # Date format can be flexible (e.g., '3/1/2025', 'March 1, 2025'). search_before_date_filter: str | None = None, ) -> Solver: @@ -75,6 +76,7 @@ def perplexity_solver( search_context_size is not None or search_before_date_filter is not None or search_mode + or reasoning_effort ): extra_body = {} @@ -83,7 +85,10 @@ def perplexity_solver( "search_context_size": search_context_size } - extra_body["search_mode"] = search_mode + if search_mode: + extra_body["search_mode"] = search_mode + if reasoning_effort: + extra_body["reasoning_effort"] = reasoning_effort if search_before_date_filter is not None: extra_body["search_before_date_filter"] = search_before_date_filter diff --git a/astabench/tools/native_provider_tools.py b/astabench/tools/native_provider_tools.py index cc85cb78..930d4029 100644 --- a/astabench/tools/native_provider_tools.py +++ b/astabench/tools/native_provider_tools.py @@ -69,8 +69,8 @@ def make_native_search_tools(inserted_before: str | None = None) -> list[Tool]: return [tool] elif isinstance(model.api, PerplexityAPI): perplexity_options = { - "search_mode": "academic", - "web_search_options": {"search_context_size": "high"}, + "search_mode": "", + "web_search_options": {"search_context_size": "high", "reasoning_effort": "high"}, } if inserted_before: perplexity_options["search_before_date_filter"] = inserted_before