Skip to content

Commit 66c77d6

Browse files
committed
Improved names and comments explaining how DocMetadataClient.upgrade_doc_to_doc_details works
1 parent 1e1a79d commit 66c77d6

File tree

1 file changed

+11
-12
lines changed

1 file changed

+11
-12
lines changed

src/paperqa/clients/__init__.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -197,16 +197,16 @@ async def bulk_query(
197197
)
198198

199199
async def upgrade_doc_to_doc_details(self, doc: Doc, **kwargs) -> DocDetails:
200-
201-
# note we have some extra fields which may have come from reading the doc text,
202-
# but aren't in the doc object, we add them here too.
203-
extra_fields = {
200+
# Collect fields (e.g. title, DOI, or authors) that have been externally
201+
# specified (e.g. by a caller, or inferred from the document's contents)
202+
# but are not on the input `doc` object
203+
provided_fields = {
204204
k: v for k, v in kwargs.items() if k in set(DocDetails.model_fields)
205205
}
206-
# abuse our doc_details object to be an int if it's empty
207-
# our __add__ operation supports int by doing nothing
208-
extra_doc: int | DocDetails = (
209-
0 if not extra_fields else DocDetails(**extra_fields)
206+
# DocDetails.__add__ supports `int` as a no-op route, so if we have no
207+
# provided fields, let's use that no-op route
208+
provided_doc_details: int | DocDetails = (
209+
0 if not provided_fields else DocDetails(**provided_fields)
210210
)
211211

212212
if doc_details := await self.query(**kwargs):
@@ -222,9 +222,8 @@ async def upgrade_doc_to_doc_details(self, doc: Doc, **kwargs) -> DocDetails:
222222
doc_details.key = doc.docname
223223
if "citation" in doc.fields_to_overwrite_from_metadata:
224224
doc_details.citation = doc.citation
225-
return extra_doc + doc_details
225+
return provided_doc_details + doc_details
226226

227227
# if we can't get metadata, just return the doc, but don't overwrite any fields
228-
prior_doc = doc.model_dump()
229-
prior_doc["fields_to_overwrite_from_metadata"] = set()
230-
return DocDetails(**(prior_doc | extra_fields))
228+
orig_fields = doc.model_dump() | {"fields_to_overwrite_from_metadata": set()}
229+
return DocDetails(**(orig_fields | provided_fields))

0 commit comments

Comments
 (0)