Skip to content

Commit 902f8b9

Browse files
author
Chris Park
committed
publish 0.5.6
1 parent 82f9bf6 commit 902f8b9

File tree

2 files changed

+694
-37
lines changed

2 files changed

+694
-37
lines changed

rosette/api.py

Lines changed: 44 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,11 @@
22

33
"""
44
Python client for the Rosette API.
5-
65
Copyright (c) 2014-2015 Basis Technology Corporation.
7-
86
Licensed under the Apache License, Version 2.0 (the "License");
97
you may not use this file except in compliance with the License.
108
You may obtain a copy of the License at
119
http://www.apache.org/licenses/LICENSE-2.0
12-
1310
Unless required by applicable law or agreed to in writing, software
1411
distributed under the License is distributed on an "AS IS" BASIS,
1512
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -24,10 +21,16 @@
2421
import logging
2522
import sys
2623
import pprint
24+
from datetime import datetime
2725

2826
_ACCEPTABLE_SERVER_VERSION = "0.5"
2927
_GZIP_BYTEARRAY = bytearray([0x1F, 0x8b, 0x08])
30-
N_RETRIES = 1
28+
N_RETRIES = 3
29+
HTTP_CONNECTION = None
30+
REUSE_CONNECTION = True
31+
CONNECTION_TYPE = ""
32+
CONNECTION_START = datetime.now()
33+
CONNECTION_REFRESH_DURATION = 86400
3134

3235

3336
_IsPy3 = sys.version_info[0] == 3
@@ -67,22 +70,39 @@ def _my_loads(obj):
6770

6871

6972
def _retrying_request(op, url, data, headers):
73+
global HTTP_CONNECTION
74+
global REUSE_CONNECTION
75+
global CONNECTION_TYPE
76+
global CONNECTION_START
77+
global CONNECTION_REFRESH_DURATION
78+
79+
timeDelta = datetime.now() - CONNECTION_START
80+
totalTime = timeDelta.days * 86400 + timeDelta.seconds
81+
parsed = urlparse.urlparse(url)
82+
if parsed.scheme != CONNECTION_TYPE:
83+
totalTime = CONNECTION_REFRESH_DURATION
84+
85+
if not REUSE_CONNECTION or HTTP_CONNECTION is None or totalTime >= CONNECTION_REFRESH_DURATION:
86+
parsed = urlparse.urlparse(url)
87+
loc = parsed.netloc
88+
CONNECTION_TYPE = parsed.scheme
89+
CONNECTION_START = datetime.now()
90+
if parsed.scheme == "https":
91+
HTTP_CONNECTION = httplib.HTTPSConnection(loc)
92+
else:
93+
HTTP_CONNECTION = httplib.HTTPConnection(loc)
94+
7095
message = None
7196
code = "unknownError"
72-
parsed = urlparse.urlparse(url)
73-
loc = parsed.netloc
74-
if parsed.scheme == "https":
75-
conn = httplib.HTTPSConnection(loc)
76-
else:
77-
conn = httplib.HTTPConnection(loc)
7897
rdata = None
7998
for i in range(N_RETRIES + 1):
80-
conn.request(op, url, data, headers)
81-
response = conn.getresponse()
99+
HTTP_CONNECTION.request(op, url, data, headers)
100+
response = HTTP_CONNECTION.getresponse()
82101
status = response.status
83102
rdata = response.read()
84103
if status < 500:
85-
conn.close()
104+
if not REUSE_CONNECTION:
105+
HTTP_CONNECTION.close()
86106
return rdata, status
87107
if rdata is not None:
88108
try:
@@ -93,12 +113,13 @@ def _retrying_request(op, url, data, headers):
93113
code = the_json["code"]
94114
except:
95115
pass
96-
conn.close()
97116
# Do not wait to retry -- the model is that a bunch of dynamically-routed
98117
# resources has failed -- Retry means some other set of servelets and their
99118
# underlings will be called up, and maybe they'll do better.
100119
# This will not help with a persistent or impassible delay situation,
101120
# but the former case is thought to be more likely.
121+
if not REUSE_CONNECTION:
122+
HTTP_CONECTION.close()
102123

103124
if message is None:
104125
message = "A retryable network operation has not succeeded after " + str(N_RETRIES) + " attempts"
@@ -136,7 +157,6 @@ def add_query(orig_url, key, value):
136157

137158
class RosetteException(Exception):
138159
"""Exception thrown by all Rosette API operations for errors local and remote.
139-
140160
TBD. Right now, the only valid operation is conversion to __str__.
141161
"""
142162

@@ -253,9 +273,7 @@ class DocumentParameters(_DocumentParamSetBase):
253273
convenience instance methods L{DocumentParameters.load_document_file}
254274
and L{DocumentParameters.load_document_string}. The unit size and
255275
data format are defaulted to L{InputUnit.DOC} and L{DataFormat.SIMPLE}.
256-
257276
Using subscripts instead of instance variables facilitates diagnosis.
258-
259277
If the field C{contentUri} is set to the URL of a web page (only
260278
protocols C{http, https, ftp, ftps} are accepted), the server will
261279
fetch the content from that web page. In this case, neither C{content}
@@ -334,21 +352,13 @@ class NameTranslationParameters(_DocumentParamSetBase):
334352
All are optional except C{name} and C{targetLanguage}. Scripts are in
335353
ISO15924 codes, and languages in ISO639 (two- or three-letter) codes. See the Name Translation documentation for
336354
more description of these terms, as well as the content of the return result.
337-
338355
C{name} The name to be translated.
339-
340356
C{targetLangauge} The language into which the name is to be translated.
341-
342357
C{entityType} The entity type (TBD) of the name.
343-
344358
C{sourceLanguageOfOrigin} The language of origin of the name.
345-
346359
C{sourceLanguageOfUse} The language of use of the name.
347-
348360
C{sourceScript} The script in which the name is supplied.
349-
350361
C{targetScript} The script into which the name should be translated.
351-
352362
C{targetScheme} The transliteration scheme by which the translated name should be rendered.
353363
"""
354364

@@ -366,19 +376,12 @@ def validate(self):
366376
class NameMatchingParameters(_DocumentParamSetBase):
367377
"""Parameter object for C{matched_name} endpoint.
368378
All are required.
369-
370379
C{name1} The name to be matched, a C{name} object.
371-
372380
C{name2} The name to be matched, a C{name} object.
373-
374381
The C{name} object contains these fields:
375-
376382
C{text} Text of the name, required.
377-
378383
C{language} Language of the name in ISO639 three-letter code, optional.
379-
380384
C{script} The ISO15924 code of the name, optional.
381-
382385
C{entityType} The entity type, can be "PERSON", "LOCATION" or "ORGANIZATION", optional.
383386
"""
384387

@@ -399,11 +402,9 @@ class EndpointCaller:
399402
of the Rosette server, specified at its creation. Use the specific
400403
instance methods of the L{API} object to create L{EndpointCaller} objects bound to
401404
corresponding endpoints.
402-
403405
Use L{EndpointCaller.ping} to ping, and L{EndpointCaller.info} to retrieve server info.
404406
For all other types of requests, use L{EndpointCaller.call}, which accepts
405407
an argument specifying the data to be processed and certain metadata.
406-
407408
The results of all operations are returned as python dictionaries, whose
408409
keys and values correspond exactly to those of the corresponding
409410
JSON return value described in the Rosette web service documentation.
@@ -489,11 +490,9 @@ def call(self, parameters):
489490
endpoints except C{translated_name} and C{matched_name}, it must be a L{DocumentParameters}
490491
object; for C{translated_name}, it must be an L{NameTranslationParameters} object;
491492
for C{matched_name}, it must be an L{NameMatchingParameters} object.
492-
493493
In all cases, the result is returned as a python dictionary
494494
conforming to the JSON object described in the endpoint's entry
495495
in the Rosette web service documentation.
496-
497496
@param parameters: An object specifying the data,
498497
and possible metadata, to be processed by the endpoint. See the
499498
details for those object types.
@@ -525,7 +524,7 @@ class API:
525524
Call instance methods upon this object to obtain L{EndpointCaller} objects
526525
which can communicate with particular Rosette server endpoints.
527526
"""
528-
def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1', retries=1, debug=False):
527+
def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1', retries=3, reuse_connection=True, refresh_duration=86400, debug=False):
529528
""" Create an L{API} object.
530529
@param user_key: (Optional; required for servers requiring authentication.) An authentication string to be sent
531530
as user_key with all requests. The default Rosette server requires authentication.
@@ -540,10 +539,18 @@ def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1',
540539
self.debug = debug
541540
self.useMultipart = False
542541
self.version_checked = False
542+
543543
global N_RETRIES
544+
global REUSE_CONNECTION
545+
global CONNECTION_REFRESH_DURATION
546+
544547
if (retries < 1):
545548
retries = 1
549+
if refresh_duration < 60:
550+
refresh_duration = 60
546551
N_RETRIES = retries
552+
REUSE_CONNECTION = reuse_connection
553+
CONNECTION_REFRESH_DURATION = refresh_duration
547554

548555
def check_version(self):
549556
if self.version_checked:

0 commit comments

Comments
 (0)