Skip to content

Commit 56ebea7

Browse files
authored
Fixed Docs.aadd giving bad docname after upgrade (#1025)
1 parent fed0f22 commit 56ebea7

File tree

2 files changed

+16
-12
lines changed

2 files changed

+16
-12
lines changed

src/paperqa/docs.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -300,10 +300,13 @@ async def aadd( # noqa: PLR0912
300300
):
301301
citation = f"Unknown, {os.path.basename(path)}, {datetime.now().year}"
302302

303-
docname = citation_to_docname(citation) if docname is None else docname
304-
docname = self._get_unique_name(docname)
305-
306-
doc = Doc(docname=docname, citation=citation, dockey=dockey)
303+
doc = Doc(
304+
docname=self._get_unique_name(
305+
citation_to_docname(citation) if docname is None else docname
306+
),
307+
citation=citation,
308+
dockey=dockey,
309+
)
307310

308311
# try to extract DOI / title from the citation
309312
if (doi is title is None) and parse_config.use_doc_details:
@@ -402,7 +405,7 @@ async def aadd( # noqa: PLR0912
402405
" to ignore this error."
403406
)
404407
if await self.aadd_texts(texts, doc, all_settings, embedding_model):
405-
return docname
408+
return doc.docname
406409
return None
407410

408411
def add_texts(

tests/test_paperqa.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -934,17 +934,17 @@ async def test_repeat_keys(stub_data_dir) -> None:
934934
@pytest.mark.asyncio
935935
async def test_pdf_reader_w_no_match_doc_details(stub_data_dir: Path) -> None:
936936
docs = Docs()
937-
await docs.aadd(
937+
docname = await docs.aadd(
938938
stub_data_dir / "paper.pdf",
939939
"Wellawatte et al, XAI Review, 2023",
940940
)
941+
(doc_details,) = docs.docs.values()
942+
assert doc_details.docname == docname, "Added name should match between details"
941943
# doc will be a DocDetails object, but nothing can be found
942944
# thus, we retain the prior citation data
943945
assert (
944-
next(iter(docs.docs.values())).citation == "Wellawatte et al, XAI Review, 2023"
945-
)
946-
assert (
947-
next(iter(docs.docs.values())).formatted_citation
946+
doc_details.citation
947+
== doc_details.formatted_citation
948948
== "Wellawatte et al, XAI Review, 2023"
949949
), "Formatted citation should be the same when no metadata is found."
950950

@@ -1016,7 +1016,7 @@ async def test_partly_embedded_texts(defer_embeddings: bool) -> None:
10161016
@pytest.mark.asyncio
10171017
async def test_pdf_reader_match_doc_details(stub_data_dir: Path) -> None:
10181018
docs = Docs()
1019-
await docs.aadd(
1019+
docname = await docs.aadd(
10201020
stub_data_dir / "paper.pdf",
10211021
"Wellawatte et al, A Perspective on Explanations of Molecular Prediction"
10221022
" Models, XAI Review, 2023",
@@ -1027,7 +1027,8 @@ async def test_pdf_reader_match_doc_details(stub_data_dir: Path) -> None:
10271027
}, # Limit to only crossref since s2 is too flaky
10281028
fields=["author", "journal", "citation_count"],
10291029
)
1030-
doc_details = next(iter(docs.docs.values()))
1030+
(doc_details,) = docs.docs.values()
1031+
assert doc_details.docname == docname, "Added name should match between details"
10311032
# Crossref is non-deterministic in its ordering for results
10321033
# thus we need to capture both possible dockeys
10331034
assert doc_details.dockey in {"d7763485f06aabde", "5300ef1d5fb960d7"}

0 commit comments

Comments
 (0)