Skip to content

Concluding v6 deprecation cycle #1054

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,9 @@ from paperqa import Settings, ask

answer_response = ask(
"What is PaperQA2?",
settings=Settings(temperature=0.5, paper_directory="my_papers"),
settings=Settings(
temperature=0.5, agent={"index": {"paper_directory": "my_papers"}}
),
)
```

Expand All @@ -374,7 +376,9 @@ from paperqa import Settings, agent_query

answer_response = await agent_query(
query="What is PaperQA2?",
settings=Settings(temperature=0.5, paper_directory="my_papers"),
settings=Settings(
temperature=0.5, agent={"index": {"paper_directory": "my_papers"}}
),
)
```

Expand Down Expand Up @@ -469,7 +473,7 @@ from paperqa import Settings, ask
answer_response = ask(
"What is PaperQA2?",
settings=Settings(
llm="gpt-4o-mini", summary_llm="gpt-4o-mini", paper_directory="my_papers"
llm="gpt-4o-mini", summary_llm="gpt-4o-mini", agent={"index": {"paper_directory": "my_papers"}}
),
)
```
Expand Down Expand Up @@ -772,9 +776,9 @@ for ... in my_docs:
Indexes will be placed in the [home directory][home dir] by default.
This can be controlled via the `PQA_HOME` environment variable.

Indexes are made by reading files in the `Settings.paper_directory`.
Indexes are made by reading files in the `IndexSettings.paper_directory`.
By default, we recursively read from subdirectories of the paper directory,
unless disabled using `Settings.index_recursively`.
unless disabled using `IndexSettings.recurse_subdirectories`.
The paper directory is not modified in any way, it's just read from.

[home dir]: https://docs.python.org/3/library/pathlib.html#pathlib.Path.home
Expand All @@ -800,7 +804,7 @@ which also works when called on `DocDetails`.
### Reusing Index

The local search indexes are built based on a hash of the current `Settings` object.
So make sure you properly specify the `paper_directory` to your `Settings` object.
So make sure you properly specify the `paper_directory` to your `IndexSettings` object.
In general, it's advisable to:

1. Pre-build an index given a folder of papers (can take several minutes)
Expand All @@ -815,7 +819,7 @@ from paperqa.agents.search import get_directory_index


async def amain(folder_of_papers: str | os.PathLike) -> None:
settings = Settings(paper_directory=folder_of_papers)
settings = Settings(agent={"index": {"paper_directory": folder_of_papers}})

# 1. Build the index. Note an index name is autogenerated when unspecified
built_index = await get_directory_index(settings=settings)
Expand Down Expand Up @@ -947,7 +951,6 @@ will return much faster than the first query and we'll be certain the authors ma
| `agent.return_paper_metadata` | `False` | Whether to include paper title/year in search tool results. |
| `agent.search_count` | `8` | Search count. |
| `agent.timeout` | `500.0` | Timeout on agent execution (seconds). |
| `agent.should_pre_search` | `False` | Whether to run search tool before invoking agent. |
| `agent.tool_names` | `None` | Optional override on tools to provide the agent. |
| `agent.max_timesteps` | `None` | Optional upper limit on environment steps. |
| `agent.index.name` | `None` | Optional name of the index. |
Expand Down
11 changes: 1 addition & 10 deletions src/paperqa/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import warnings

from lmi import (
EmbeddingModel,
HybridEmbeddingModel,
Expand All @@ -21,17 +19,10 @@
VectorStore,
)
from paperqa.settings import Settings, get_settings
from paperqa.types import Answer, Context, Doc, DocDetails, Text
from paperqa.types import Context, Doc, DocDetails, Text
from paperqa.version import __version__

# TODO: remove after refactoring all models to avoid using _* private vars
warnings.filterwarnings(
"ignore", message="Valid config keys have changed in V2:", module="pydantic"
)


__all__ = [
"Answer",
"Context",
"Doc",
"DocDetails",
Expand Down
33 changes: 2 additions & 31 deletions src/paperqa/agents/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import pickle
import re
import sys
import warnings
import zlib
from collections import Counter
from collections.abc import AsyncIterator, Callable, Sequence
Expand Down Expand Up @@ -613,38 +612,21 @@ def progress_bar_update() -> None:
return contextlib.nullcontext(), None


async def get_directory_index( # noqa: PLR0912
index_name: str | None = None,
sync_index_w_directory: bool = True,
settings: MaybeSettings = None,
build: bool = True,
async def get_directory_index(
settings: MaybeSettings = None, build: bool = True
) -> SearchIndex:
"""
Create a Tantivy index by reading from a directory of text files.

This function only reads from the source directory, not edits or writes to it.

Args:
index_name: Deprecated override on the name of the index. If unspecified,
the default behavior is to generate the name from the input settings.
sync_index_w_directory: Opt-out flag to sync the index (add or delete index
files) with the source paper directory.
settings: Application settings.
build: Opt-out flag (default is True) to read the contents of the source paper
directory and if sync_index_w_directory is enabled also update the index.
"""
_settings = get_settings(settings)
index_settings = _settings.agent.index
if index_name:
warnings.warn(
"The index_name argument has been moved to"
f" {type(_settings.agent.index).__name__},"
" this deprecation will conclude in version 6.",
category=DeprecationWarning,
stacklevel=2,
)
index_settings.name = index_name
del index_name

search_index = SearchIndex(
fields=[*SearchIndex.REQUIRED_FIELDS, "title", "year"],
Expand All @@ -660,17 +642,6 @@ async def get_directory_index( # noqa: PLR0912
)
return search_index

if not sync_index_w_directory:
warnings.warn(
"The sync_index_w_directory argument has been moved to"
f" {type(_settings.agent.index).__name__},"
" this deprecation will conclude in version 6.",
category=DeprecationWarning,
stacklevel=2,
)
index_settings.sync_with_paper_directory = sync_index_w_directory
del sync_index_w_directory

paper_directory = anyio.Path(index_settings.paper_directory)
manifest = await maybe_get_manifest(
filename=await index_settings.finalize_manifest_file()
Expand Down
4 changes: 2 additions & 2 deletions src/paperqa/agents/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ def default_status(state: "EnvironmentState") -> str:
class EnvironmentState(BaseModel):
"""State here contains documents and answer being populated."""

model_config = ConfigDict(extra="forbid", populate_by_name=True)
model_config = ConfigDict(extra="forbid")

docs: Docs
session: PQASession = Field(..., alias="answer")
session: PQASession
status_fn: Callable[[Self], str] | None = Field(
default=None,
description=(
Expand Down
10 changes: 6 additions & 4 deletions src/paperqa/contrib/openreview_paper_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(
password: str | None = None,
) -> None:
self.settings = settings
Path(settings.paper_directory).mkdir(parents=True, exist_ok=True)
Path(settings.agent.index.paper_directory).mkdir(parents=True, exist_ok=True)
if openreview is None:
raise ImportError(
"openreview requires the 'openreview-py' extra. Please run: `pip"
Expand Down Expand Up @@ -122,7 +122,9 @@ async def _get_relevant_papers_chunk(self, question: str, chunk: str) -> list[An

async def download_papers(self, submissions: list[Any]) -> None:
"""Download PDFs for given submissions."""
downloaded_papers = Path(self.settings.paper_directory).rglob("*.pdf")
downloaded_papers = Path(self.settings.agent.index.paper_directory).rglob(
"*.pdf"
)
downloaded_ids = [p.stem for p in downloaded_papers]
logger.info("Downloading PDFs for relevant papers.")
for submission in submissions:
Expand All @@ -136,7 +138,7 @@ async def _download_pdf(self, submission: Any) -> bool:
response = await client.get(pdf_link)
if response.status_code == httpx.codes.OK.value:
async with await anyio.open_file(
f"{self.settings.paper_directory}/{submission.id}.pdf", "wb"
f"{self.settings.agent.index.paper_directory}/{submission.id}.pdf", "wb"
) as f:
await f.write(response.content)
return True
Expand All @@ -151,7 +153,7 @@ async def aadd_docs(
) -> Docs:
if docs is None:
docs = Docs()
for doc_path in Path(self.settings.paper_directory).rglob("*.pdf"):
for doc_path in Path(self.settings.agent.index.paper_directory).rglob("*.pdf"):
sub = subs.get(doc_path.stem) if subs is not None else None
if sub:
await docs.aadd(
Expand Down
Loading