@@ -197,16 +197,16 @@ async def bulk_query(
197
197
)
198
198
199
199
async def upgrade_doc_to_doc_details (self , doc : Doc , ** kwargs ) -> DocDetails :
200
-
201
- # note we have some extra fields which may have come from reading the doc text,
202
- # but aren't in the doc object, we add them here too.
203
- extra_fields = {
200
+ # Collect fields (e.g. title, DOI, or authors) that have been externally
201
+ # specified (e.g. by a caller, or inferred from the document's contents)
202
+ # but are not on the input ` doc` object
203
+ provided_fields = {
204
204
k : v for k , v in kwargs .items () if k in set (DocDetails .model_fields )
205
205
}
206
- # abuse our doc_details object to be an int if it's empty
207
- # our __add__ operation supports int by doing nothing
208
- extra_doc : int | DocDetails = (
209
- 0 if not extra_fields else DocDetails (** extra_fields )
206
+ # DocDetails.__add__ supports `int` as a no-op route, so if we have no
207
+ # provided fields, let's use that no-op route
208
+ provided_doc_details : int | DocDetails = (
209
+ 0 if not provided_fields else DocDetails (** provided_fields )
210
210
)
211
211
212
212
if doc_details := await self .query (** kwargs ):
@@ -222,9 +222,8 @@ async def upgrade_doc_to_doc_details(self, doc: Doc, **kwargs) -> DocDetails:
222
222
doc_details .key = doc .docname
223
223
if "citation" in doc .fields_to_overwrite_from_metadata :
224
224
doc_details .citation = doc .citation
225
- return extra_doc + doc_details
225
+ return provided_doc_details + doc_details
226
226
227
227
# if we can't get metadata, just return the doc, but don't overwrite any fields
228
- prior_doc = doc .model_dump ()
229
- prior_doc ["fields_to_overwrite_from_metadata" ] = set ()
230
- return DocDetails (** (prior_doc | extra_fields ))
228
+ orig_fields = doc .model_dump () | {"fields_to_overwrite_from_metadata" : set ()}
229
+ return DocDetails (** (orig_fields | provided_fields ))
0 commit comments