Skip to content

Commit dc3050c

Browse files
author
Chris Park
committed
publish 0.5.7
1 parent 7684442 commit dc3050c

File tree

2 files changed

+75
-670
lines changed

2 files changed

+75
-670
lines changed

rosette/api.py

Lines changed: 75 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22

33
"""
44
Python client for the Rosette API.
5+
56
Copyright (c) 2014-2015 Basis Technology Corporation.
7+
68
Licensed under the Apache License, Version 2.0 (the "License");
79
you may not use this file except in compliance with the License.
810
You may obtain a copy of the License at
911
http://www.apache.org/licenses/LICENSE-2.0
12+
1013
Unless required by applicable law or agreed to in writing, software
1114
distributed under the License is distributed on an "AS IS" BASIS,
1215
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -21,6 +24,8 @@
2124
import logging
2225
import sys
2326
import pprint
27+
import time
28+
from socket import gethostbyname, gaierror
2429
from datetime import datetime
2530

2631
_ACCEPTABLE_SERVER_VERSION = "0.5"
@@ -31,7 +36,7 @@
3136
CONNECTION_TYPE = ""
3237
CONNECTION_START = datetime.now()
3338
CONNECTION_REFRESH_DURATION = 86400
34-
39+
N_RETRIES = 3
3540

3641
_IsPy3 = sys.version_info[0] == 3
3742

@@ -96,30 +101,55 @@ def _retrying_request(op, url, data, headers):
96101
code = "unknownError"
97102
rdata = None
98103
for i in range(N_RETRIES + 1):
99-
HTTP_CONNECTION.request(op, url, data, headers)
100-
response = HTTP_CONNECTION.getresponse()
101-
status = response.status
102-
rdata = response.read()
103-
if status < 500:
104-
if not REUSE_CONNECTION:
105-
HTTP_CONNECTION.close()
106-
return rdata, status
107-
if rdata is not None:
108-
try:
109-
the_json = _my_loads(rdata)
110-
if "message" in the_json:
111-
message = the_json["message"]
112-
if "code" in the_json:
113-
code = the_json["code"]
114-
except:
115-
pass
104+
# Try to connect with the Rosette API server
105+
# 500 errors will store a message and code
106+
try:
107+
HTTP_CONNECTION.request(op, url, data, headers)
108+
response = HTTP_CONNECTION.getresponse()
109+
status = response.status
110+
rdata = response.read()
111+
if status < 500:
112+
if not REUSE_CONNECTION:
113+
HTTP_CONNECTION.close()
114+
return rdata, status
115+
if rdata is not None:
116+
try:
117+
the_json = _my_loads(rdata)
118+
if "message" in the_json:
119+
message = the_json["message"]
120+
if "code" in the_json:
121+
code = the_json["code"]
122+
except:
123+
pass
124+
# If there are issues connecting to the API server,
125+
# try to regenerate the connection as long as there are
126+
# still retries left.
127+
# A short sleep delay occurs (similar to google reconnect)
128+
# if the problem was a temporal one.
129+
except (httplib.BadStatusLine, gaierror) as e:
130+
totalTime = CONNECTION_REFRESH_DURATION
131+
if i == N_RETRIES - 1:
132+
raise RosetteException("ConnectionError", "Unable to establish connection to the Rosette API server", url)
133+
else:
134+
if not REUSE_CONNECTION or HTTP_CONNECTION is None or totalTime >= CONNECTION_REFRESH_DURATION:
135+
time.sleep(min(5 * (i + 1) * (i + 1), 300))
136+
parsed = urlparse.urlparse(url)
137+
loc = parsed.netloc
138+
CONNECTION_TYPE = parsed.scheme
139+
CONNECTION_START = datetime.now()
140+
if parsed.scheme == "https":
141+
HTTP_CONNECTION = httplib.HTTPSConnection(loc)
142+
else:
143+
HTTP_CONNECTION = httplib.HTTPConnection(loc)
144+
116145
# Do not wait to retry -- the model is that a bunch of dynamically-routed
117146
# resources has failed -- Retry means some other set of servelets and their
118147
# underlings will be called up, and maybe they'll do better.
119148
# This will not help with a persistent or impassible delay situation,
120149
# but the former case is thought to be more likely.
150+
121151
if not REUSE_CONNECTION:
122-
HTTP_CONECTION.close()
152+
HTTP_CONNECTION.close()
123153

124154
if message is None:
125155
message = "A retryable network operation has not succeeded after " + str(N_RETRIES) + " attempts"
@@ -157,6 +187,7 @@ def add_query(orig_url, key, value):
157187

158188
class RosetteException(Exception):
159189
"""Exception thrown by all Rosette API operations for errors local and remote.
190+
160191
TBD. Right now, the only valid operation is conversion to __str__.
161192
"""
162193

@@ -273,7 +304,9 @@ class DocumentParameters(_DocumentParamSetBase):
273304
convenience instance methods L{DocumentParameters.load_document_file}
274305
and L{DocumentParameters.load_document_string}. The unit size and
275306
data format are defaulted to L{InputUnit.DOC} and L{DataFormat.SIMPLE}.
307+
276308
Using subscripts instead of instance variables facilitates diagnosis.
309+
277310
If the field C{contentUri} is set to the URL of a web page (only
278311
protocols C{http, https, ftp, ftps} are accepted), the server will
279312
fetch the content from that web page. In this case, neither C{content}
@@ -352,13 +385,21 @@ class NameTranslationParameters(_DocumentParamSetBase):
352385
All are optional except C{name} and C{targetLanguage}. Scripts are in
353386
ISO15924 codes, and languages in ISO639 (two- or three-letter) codes. See the Name Translation documentation for
354387
more description of these terms, as well as the content of the return result.
388+
355389
C{name} The name to be translated.
390+
356391
C{targetLangauge} The language into which the name is to be translated.
392+
357393
C{entityType} The entity type (TBD) of the name.
394+
358395
C{sourceLanguageOfOrigin} The language of origin of the name.
396+
359397
C{sourceLanguageOfUse} The language of use of the name.
398+
360399
C{sourceScript} The script in which the name is supplied.
400+
361401
C{targetScript} The script into which the name should be translated.
402+
362403
C{targetScheme} The transliteration scheme by which the translated name should be rendered.
363404
"""
364405

@@ -376,12 +417,19 @@ def validate(self):
376417
class NameMatchingParameters(_DocumentParamSetBase):
377418
"""Parameter object for C{matched_name} endpoint.
378419
All are required.
420+
379421
C{name1} The name to be matched, a C{name} object.
422+
380423
C{name2} The name to be matched, a C{name} object.
424+
381425
The C{name} object contains these fields:
426+
382427
C{text} Text of the name, required.
428+
383429
C{language} Language of the name in ISO639 three-letter code, optional.
430+
384431
C{script} The ISO15924 code of the name, optional.
432+
385433
C{entityType} The entity type, can be "PERSON", "LOCATION" or "ORGANIZATION", optional.
386434
"""
387435

@@ -402,9 +450,11 @@ class EndpointCaller:
402450
of the Rosette server, specified at its creation. Use the specific
403451
instance methods of the L{API} object to create L{EndpointCaller} objects bound to
404452
corresponding endpoints.
453+
405454
Use L{EndpointCaller.ping} to ping, and L{EndpointCaller.info} to retrieve server info.
406455
For all other types of requests, use L{EndpointCaller.call}, which accepts
407456
an argument specifying the data to be processed and certain metadata.
457+
408458
The results of all operations are returned as python dictionaries, whose
409459
keys and values correspond exactly to those of the corresponding
410460
JSON return value described in the Rosette web service documentation.
@@ -490,9 +540,11 @@ def call(self, parameters):
490540
endpoints except C{translated_name} and C{matched_name}, it must be a L{DocumentParameters}
491541
object; for C{translated_name}, it must be an L{NameTranslationParameters} object;
492542
for C{matched_name}, it must be an L{NameMatchingParameters} object.
543+
493544
In all cases, the result is returned as a python dictionary
494545
conforming to the JSON object described in the endpoint's entry
495546
in the Rosette web service documentation.
547+
496548
@param parameters: An object specifying the data,
497549
and possible metadata, to be processed by the endpoint. See the
498550
details for those object types.
@@ -515,6 +567,9 @@ def call(self, parameters):
515567
headers["user_key"] = self.user_key
516568
headers['Content-Type'] = "application/json"
517569
r = _post_http(url, params_to_serialize, headers)
570+
# pprint.pprint(headers)
571+
# pprint.pprint(url)
572+
# pprint.pprint(params_to_serialize)
518573
return self.__finish_result(r, "operate")
519574

520575

@@ -546,7 +601,7 @@ def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1',
546601

547602
if (retries < 1):
548603
retries = 1
549-
if refresh_duration < 60:
604+
if (refresh_duration < 60):
550605
refresh_duration = 60
551606
N_RETRIES = retries
552607
REUSE_CONNECTION = reuse_connection

0 commit comments

Comments
 (0)