File tree Expand file tree Collapse file tree 3 files changed +12
-1
lines changed
Expand file tree Collapse file tree 3 files changed +12
-1
lines changed Original file line number Diff line number Diff line change 1313 runs-on : ubuntu-latest
1414 steps :
1515 - uses : actions/checkout@v4
16+ - name : Persist requests-cache's cache file
17+ uses : actions/cache@v4
18+ with :
19+ key : http_cache
20+ path : http_cache.sqlite
1621 - uses : actions/setup-python@v5
1722 with :
1823 python-version : ' 3.13'
Original file line number Diff line number Diff line change @@ -8,6 +8,7 @@ authors = [
88]
99dependencies = [
1010 " requests" ,
11+ " requests-cache" ,
1112 " PyYAML" ,
1213]
1314description = " Parse publications from ResearchFish API and produces the files needed to update Earlham Institute's website and CKAN."
Original file line number Diff line number Diff line change 2525 Response ,
2626 Session ,
2727)
28+ from requests_cache import CachedSession
2829
2930from .util import (
3031 extend_list_to_size ,
4243REQUEST_TIMEOUT = 5.0
4344REQUEST_RETRIES = 3
4445REQUEST_RETRIES_BACKOFF_FACTOR = 1.0
46+ # How long to store a cached response, if Cache-Control headers are missing in the response
47+ CACHED_RESPONSE_EXPIRE_AFTER = 7 * 24 * 60 * 60
4548BASE_CR_URL = "https://api.crossref.org"
4649BASE_DC_URL = "https://api.datacite.org"
4750BASE_DOI_URL = "https://doi.org"
@@ -248,6 +251,8 @@ def get_url(
248251 sleep (backoff_time )
249252 else :
250253 raise Exception (f"Failed too many times to get URL { url } " )
254+ if isinstance (s , CachedSession ):
255+ log .debug ("URL %s retrieved from cache: %s" , url , r .from_cache ) # type:ignore[attr-defined]
251256 return r
252257
253258
@@ -567,7 +572,7 @@ def main() -> None:
567572 cr_headers = {
568573 "User-Agent" : f"rfparser/{ __version__ } (https://github.com/TGAC/rfparser; mailto:{ config ['email' ]} )" ,
569574 }
570- unpaywall_session = Session ( )
575+ unpaywall_session = CachedSession ( expire_after = CACHED_RESPONSE_EXPIRE_AFTER , cache_control = True )
571576 for doi , pub in pubs_with_doi .items ():
572577 pub ["metadata_ok" ] = False
573578 if doi in BROKEN_DOI_TO_REASON :
You can’t perform that action at this time.
0 commit comments