@@ -208,16 +208,16 @@ async def bulk_query(
208
208
)
209
209
210
210
async def upgrade_doc_to_doc_details (self , doc : Doc , ** kwargs ) -> DocDetails :
211
-
212
- # note we have some extra fields which may have come from reading the doc text,
213
- # but aren't in the doc object, we add them here too.
214
- extra_fields = {
211
+ # Collect fields (e.g. title, DOI, or authors) that have been externally
212
+ # specified (e.g. by a caller, or inferred from the document's contents)
213
+ # but are not on the input ` doc` object
214
+ provided_fields = {
215
215
k : v for k , v in kwargs .items () if k in set (DocDetails .model_fields )
216
216
}
217
- # abuse our doc_details object to be an int if it's empty
218
- # our __add__ operation supports int by doing nothing
219
- extra_doc : int | DocDetails = (
220
- 0 if not extra_fields else DocDetails (** extra_fields )
217
+ # DocDetails.__add__ supports `int` as a no-op route, so if we have no
218
+ # provided fields, let's use that no-op route
219
+ provided_doc_details : int | DocDetails = (
220
+ 0 if not provided_fields else DocDetails (** provided_fields )
221
221
)
222
222
223
223
if doc_details := await self .query (** kwargs ):
@@ -233,9 +233,8 @@ async def upgrade_doc_to_doc_details(self, doc: Doc, **kwargs) -> DocDetails:
233
233
doc_details .key = doc .docname
234
234
if "citation" in doc .fields_to_overwrite_from_metadata :
235
235
doc_details .citation = doc .citation
236
- return extra_doc + doc_details
236
+ return provided_doc_details + doc_details
237
237
238
238
# if we can't get metadata, just return the doc, but don't overwrite any fields
239
- prior_doc = doc .model_dump ()
240
- prior_doc ["fields_to_overwrite_from_metadata" ] = set ()
241
- return DocDetails (** (prior_doc | extra_fields ))
239
+ orig_fields = doc .model_dump () | {"fields_to_overwrite_from_metadata" : set ()}
240
+ return DocDetails (** (orig_fields | provided_fields ))
0 commit comments