6
6
import json
7
7
import regex as re
8
8
from unidecode import unidecode
9
+ import time
9
10
import requests
10
11
import shelve
11
12
import xmltodict
@@ -45,9 +46,11 @@ def to_normal_dict(d):
45
46
return d
46
47
47
48
class GrobidClient :
48
- def __init__ (self , cache_path = None , host = '127.0.0.1' , port = 8070 ):
49
+ def __init__ (self , cache_path = None , host = '127.0.0.1' , port = 8070 , max_tries = 4 , retry_wait = 2 ):
49
50
self .host = host
50
51
self .port = port
52
+ self .max_tries = max (max_tries , 1 )
53
+ self .retry_wait = retry_wait
51
54
self .cache_path_shelve = Path .home ()/ '.cache' / 'refs' / 'gobrid' / 'gobrid.pkl' if cache_path is None else Path (cache_path )
52
55
self .cache_path = Path .home () / '.cache' / 'refs' / 'gobrid' / 'gobrid.db' if cache_path is None else Path (cache_path )
53
56
self .cache = None
@@ -69,11 +72,22 @@ def migrate(self):
69
72
old_cache .close ()
70
73
return count
71
74
75
+ def _post (self , data ):
76
+ tries = 0
77
+ while tries < self .max_tries :
78
+ r = requests .post (f'http://{ self .host } :{ self .port } /api/processCitation' , data = data )
79
+ if r .status_code != 503 :
80
+ return r
81
+ tries += 1
82
+ if tries < self .max_tries :
83
+ time .sleep (self .retry_wait )
84
+ raise ConnectionRefusedError (r .reason )
85
+
72
86
def parse_ref_str_to_tei_dict (self , ref_str ):
73
87
cache = self .get_cache ()
74
88
d = cache .get (ref_str )
75
89
if d is None : # potential multiple recomputation in multithreading case
76
- r = requests . post ( f'http:// { self .host } : { self . port } /api/processCitation' , data = {'citations' : ref_str })
90
+ r = self ._post ( data = {'citations' : ref_str })
77
91
d = xmltodict .parse (r .content .decode ("utf-8" ))
78
92
d = to_normal_dict (d )
79
93
cache [ref_str ] = d
0 commit comments