@@ -88,25 +88,28 @@ class JournalQuery(ClientQuery):
88
88
89
89
90
90
class MetadataProvider (ABC , Generic [ClientQueryType ]):
91
- """Provide metadata from a query by any means necessary."""
91
+ """Provide metadata from a query by any means necessary.
92
+
93
+ An example is going from a DOI to full paper metadata using Semantic Scholar.
94
+ """
92
95
93
96
async def query (self , query : dict ) -> DocDetails | None :
94
- return await self ._query (self .query_transformer (query ))
97
+ return await self ._query (self .query_factory (query ))
95
98
96
99
@abstractmethod
97
100
async def _query (self , query : ClientQueryType ) -> DocDetails | None :
98
- pass
101
+ """Run a query against the provider."""
99
102
100
103
@abstractmethod
101
- def query_transformer (self , query : dict ) -> ClientQueryType :
102
- pass
104
+ def query_factory (self , query : dict ) -> ClientQueryType :
105
+ """Create a query object from unstructured query data."""
103
106
104
107
105
108
class DOIOrTitleBasedProvider (MetadataProvider [DOIQuery | TitleAuthorQuery ]):
106
109
107
110
async def query (self , query : dict ) -> DocDetails | None :
108
111
try :
109
- client_query = self .query_transformer (query )
112
+ client_query = self .query_factory (query )
110
113
return await self ._query (client_query )
111
114
# We allow graceful failures, i.e. return "None" for both DOI errors and timeout errors
112
115
# DOINotFoundError means the paper doesn't exist in the source, the timeout is to prevent
@@ -150,7 +153,7 @@ async def _query(self, query: DOIQuery | TitleAuthorQuery) -> DocDetails | None:
150
153
TimeoutError: When the request takes too long on the client side
151
154
"""
152
155
153
- def query_transformer (self , query : dict ) -> DOIQuery | TitleAuthorQuery :
156
+ def query_factory (self , query : dict ) -> DOIQuery | TitleAuthorQuery :
154
157
try :
155
158
if "doi" in query :
156
159
return DOIQuery (** query )
@@ -169,7 +172,6 @@ class MetadataPostProcessor(ABC, Generic[ClientQueryType]):
169
172
170
173
MetadataPostProcessor should be idempotent and not order-dependent, i.e.
171
174
all MetadataPostProcessor instances should be able to run in parallel.
172
-
173
175
"""
174
176
175
177
async def process (self , doc_details : DocDetails , ** kwargs ) -> DocDetails :
0 commit comments