Skip to content

Commit 6b3de75

Browse files
committed
Improved names and comments explaining how DocMetadataClient.upgrade_doc_to_doc_details works
1 parent 42dc1e9 commit 6b3de75

File tree

1 file changed

+11
-12
lines changed

1 file changed

+11
-12
lines changed

src/paperqa/clients/__init__.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -208,16 +208,16 @@ async def bulk_query(
208208
)
209209

210210
async def upgrade_doc_to_doc_details(self, doc: Doc, **kwargs) -> DocDetails:
211-
212-
# note we have some extra fields which may have come from reading the doc text,
213-
# but aren't in the doc object, we add them here too.
214-
extra_fields = {
211+
# Collect fields (e.g. title, DOI, or authors) that have been externally
212+
# specified (e.g. by a caller, or inferred from the document's contents)
213+
# but are not on the input `doc` object
214+
provided_fields = {
215215
k: v for k, v in kwargs.items() if k in set(DocDetails.model_fields)
216216
}
217-
# abuse our doc_details object to be an int if it's empty
218-
# our __add__ operation supports int by doing nothing
219-
extra_doc: int | DocDetails = (
220-
0 if not extra_fields else DocDetails(**extra_fields)
217+
# DocDetails.__add__ supports `int` as a no-op route, so if we have no
218+
# provided fields, let's use that no-op route
219+
provided_doc_details: int | DocDetails = (
220+
0 if not provided_fields else DocDetails(**provided_fields)
221221
)
222222

223223
if doc_details := await self.query(**kwargs):
@@ -233,9 +233,8 @@ async def upgrade_doc_to_doc_details(self, doc: Doc, **kwargs) -> DocDetails:
233233
doc_details.key = doc.docname
234234
if "citation" in doc.fields_to_overwrite_from_metadata:
235235
doc_details.citation = doc.citation
236-
return extra_doc + doc_details
236+
return provided_doc_details + doc_details
237237

238238
# if we can't get metadata, just return the doc, but don't overwrite any fields
239-
prior_doc = doc.model_dump()
240-
prior_doc["fields_to_overwrite_from_metadata"] = set()
241-
return DocDetails(**(prior_doc | extra_fields))
239+
orig_fields = doc.model_dump() | {"fields_to_overwrite_from_metadata": set()}
240+
return DocDetails(**(orig_fields | provided_fields))

0 commit comments

Comments
 (0)