Skip to content

Commit 80a31e5

Browse files
author
Marcin Kardas
committed
Retry grobid request when service unavailable
1 parent 01cda5c commit 80a31e5

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

sota_extractor2/data/references.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import json
77
import regex as re
88
from unidecode import unidecode
9+
import time
910
import requests
1011
import shelve
1112
import xmltodict
@@ -45,9 +46,11 @@ def to_normal_dict(d):
4546
return d
4647

4748
class GrobidClient:
48-
def __init__(self, cache_path=None, host='127.0.0.1', port=8070):
49+
def __init__(self, cache_path=None, host='127.0.0.1', port=8070, max_tries=4, retry_wait=2):
4950
self.host = host
5051
self.port = port
52+
self.max_tries = max(max_tries, 1)
53+
self.retry_wait = retry_wait
5154
self.cache_path_shelve = Path.home()/'.cache'/'refs' /'gobrid'/'gobrid.pkl' if cache_path is None else Path(cache_path)
5255
self.cache_path = Path.home() / '.cache' / 'refs' /'gobrid' / 'gobrid.db' if cache_path is None else Path(cache_path)
5356
self.cache = None
@@ -69,11 +72,22 @@ def migrate(self):
6972
old_cache.close()
7073
return count
7174

75+
def _post(self, data):
76+
tries = 0
77+
while tries < self.max_tries:
78+
r = requests.post(f'http://{self.host}:{self.port}/api/processCitation', data=data)
79+
if r.status_code != 503:
80+
return r
81+
tries += 1
82+
if tries < self.max_tries:
83+
time.sleep(self.retry_wait)
84+
raise ConnectionRefusedError(r.reason)
85+
7286
def parse_ref_str_to_tei_dict(self, ref_str):
7387
cache = self.get_cache()
7488
d = cache.get(ref_str)
7589
if d is None: # potential multiple recomputation in multithreading case
76-
r = requests.post(f'http://{self.host}:{self.port}/api/processCitation', data={'citations': ref_str})
90+
r = self._post(data={'citations': ref_str})
7791
d = xmltodict.parse(r.content.decode("utf-8"))
7892
d = to_normal_dict(d)
7993
cache[ref_str] = d

0 commit comments

Comments
 (0)