22
33"""
44Python client for the Rosette API.
5-
65Copyright (c) 2014-2015 Basis Technology Corporation.
7-
86Licensed under the Apache License, Version 2.0 (the "License");
97you may not use this file except in compliance with the License.
108You may obtain a copy of the License at
119http://www.apache.org/licenses/LICENSE-2.0
12-
1310Unless required by applicable law or agreed to in writing, software
1411distributed under the License is distributed on an "AS IS" BASIS,
1512WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2421import logging
2522import sys
2623import pprint
24+ from datetime import datetime
2725
2826_ACCEPTABLE_SERVER_VERSION = "0.5"
2927_GZIP_BYTEARRAY = bytearray ([0x1F , 0x8b , 0x08 ])
30- N_RETRIES = 1
28+ N_RETRIES = 3
29+ HTTP_CONNECTION = None
30+ REUSE_CONNECTION = True
31+ CONNECTION_TYPE = ""
32+ CONNECTION_START = datetime .now ()
33+ CONNECTION_REFRESH_DURATION = 86400
3134
3235
3336_IsPy3 = sys .version_info [0 ] == 3
@@ -67,22 +70,39 @@ def _my_loads(obj):
6770
6871
6972def _retrying_request (op , url , data , headers ):
73+ global HTTP_CONNECTION
74+ global REUSE_CONNECTION
75+ global CONNECTION_TYPE
76+ global CONNECTION_START
77+ global CONNECTION_REFRESH_DURATION
78+
79+ timeDelta = datetime .now () - CONNECTION_START
80+ totalTime = timeDelta .days * 86400 + timeDelta .seconds
81+ parsed = urlparse .urlparse (url )
82+ if parsed .scheme != CONNECTION_TYPE :
83+ totalTime = CONNECTION_REFRESH_DURATION
84+
85+ if not REUSE_CONNECTION or HTTP_CONNECTION is None or totalTime >= CONNECTION_REFRESH_DURATION :
86+ parsed = urlparse .urlparse (url )
87+ loc = parsed .netloc
88+ CONNECTION_TYPE = parsed .scheme
89+ CONNECTION_START = datetime .now ()
90+ if parsed .scheme == "https" :
91+ HTTP_CONNECTION = httplib .HTTPSConnection (loc )
92+ else :
93+ HTTP_CONNECTION = httplib .HTTPConnection (loc )
94+
7095 message = None
7196 code = "unknownError"
72- parsed = urlparse .urlparse (url )
73- loc = parsed .netloc
74- if parsed .scheme == "https" :
75- conn = httplib .HTTPSConnection (loc )
76- else :
77- conn = httplib .HTTPConnection (loc )
7897 rdata = None
7998 for i in range (N_RETRIES + 1 ):
80- conn .request (op , url , data , headers )
81- response = conn .getresponse ()
99+ HTTP_CONNECTION .request (op , url , data , headers )
100+ response = HTTP_CONNECTION .getresponse ()
82101 status = response .status
83102 rdata = response .read ()
84103 if status < 500 :
85- conn .close ()
104+ if not REUSE_CONNECTION :
105+ HTTP_CONNECTION .close ()
86106 return rdata , status
87107 if rdata is not None :
88108 try :
@@ -93,12 +113,13 @@ def _retrying_request(op, url, data, headers):
93113 code = the_json ["code" ]
94114 except :
95115 pass
96- conn .close ()
97116 # Do not wait to retry -- the model is that a bunch of dynamically-routed
98117 # resources has failed -- Retry means some other set of servelets and their
99118 # underlings will be called up, and maybe they'll do better.
100119 # This will not help with a persistent or impassible delay situation,
101120 # but the former case is thought to be more likely.
121+ if not REUSE_CONNECTION :
122+ HTTP_CONECTION .close ()
102123
103124 if message is None :
104125 message = "A retryable network operation has not succeeded after " + str (N_RETRIES ) + " attempts"
@@ -136,7 +157,6 @@ def add_query(orig_url, key, value):
136157
137158class RosetteException (Exception ):
138159 """Exception thrown by all Rosette API operations for errors local and remote.
139-
140160 TBD. Right now, the only valid operation is conversion to __str__.
141161 """
142162
@@ -253,9 +273,7 @@ class DocumentParameters(_DocumentParamSetBase):
253273 convenience instance methods L{DocumentParameters.load_document_file}
254274 and L{DocumentParameters.load_document_string}. The unit size and
255275 data format are defaulted to L{InputUnit.DOC} and L{DataFormat.SIMPLE}.
256-
257276 Using subscripts instead of instance variables facilitates diagnosis.
258-
259277 If the field C{contentUri} is set to the URL of a web page (only
260278 protocols C{http, https, ftp, ftps} are accepted), the server will
261279 fetch the content from that web page. In this case, neither C{content}
@@ -334,21 +352,13 @@ class NameTranslationParameters(_DocumentParamSetBase):
334352 All are optional except C{name} and C{targetLanguage}. Scripts are in
335353 ISO15924 codes, and languages in ISO639 (two- or three-letter) codes. See the Name Translation documentation for
336354 more description of these terms, as well as the content of the return result.
337-
338355 C{name} The name to be translated.
339-
340356 C{targetLangauge} The language into which the name is to be translated.
341-
342357 C{entityType} The entity type (TBD) of the name.
343-
344358 C{sourceLanguageOfOrigin} The language of origin of the name.
345-
346359 C{sourceLanguageOfUse} The language of use of the name.
347-
348360 C{sourceScript} The script in which the name is supplied.
349-
350361 C{targetScript} The script into which the name should be translated.
351-
352362 C{targetScheme} The transliteration scheme by which the translated name should be rendered.
353363 """
354364
@@ -366,19 +376,12 @@ def validate(self):
366376class NameMatchingParameters (_DocumentParamSetBase ):
367377 """Parameter object for C{matched_name} endpoint.
368378 All are required.
369-
370379 C{name1} The name to be matched, a C{name} object.
371-
372380 C{name2} The name to be matched, a C{name} object.
373-
374381 The C{name} object contains these fields:
375-
376382 C{text} Text of the name, required.
377-
378383 C{language} Language of the name in ISO639 three-letter code, optional.
379-
380384 C{script} The ISO15924 code of the name, optional.
381-
382385 C{entityType} The entity type, can be "PERSON", "LOCATION" or "ORGANIZATION", optional.
383386 """
384387
@@ -399,11 +402,9 @@ class EndpointCaller:
399402 of the Rosette server, specified at its creation. Use the specific
400403 instance methods of the L{API} object to create L{EndpointCaller} objects bound to
401404 corresponding endpoints.
402-
403405 Use L{EndpointCaller.ping} to ping, and L{EndpointCaller.info} to retrieve server info.
404406 For all other types of requests, use L{EndpointCaller.call}, which accepts
405407 an argument specifying the data to be processed and certain metadata.
406-
407408 The results of all operations are returned as python dictionaries, whose
408409 keys and values correspond exactly to those of the corresponding
409410 JSON return value described in the Rosette web service documentation.
@@ -489,11 +490,9 @@ def call(self, parameters):
489490 endpoints except C{translated_name} and C{matched_name}, it must be a L{DocumentParameters}
490491 object; for C{translated_name}, it must be an L{NameTranslationParameters} object;
491492 for C{matched_name}, it must be an L{NameMatchingParameters} object.
492-
493493 In all cases, the result is returned as a python dictionary
494494 conforming to the JSON object described in the endpoint's entry
495495 in the Rosette web service documentation.
496-
497496 @param parameters: An object specifying the data,
498497 and possible metadata, to be processed by the endpoint. See the
499498 details for those object types.
@@ -525,7 +524,7 @@ class API:
525524 Call instance methods upon this object to obtain L{EndpointCaller} objects
526525 which can communicate with particular Rosette server endpoints.
527526 """
528- def __init__ (self , user_key = None , service_url = 'https://api.rosette.com/rest/v1' , retries = 1 , debug = False ):
527+ def __init__ (self , user_key = None , service_url = 'https://api.rosette.com/rest/v1' , retries = 3 , reuse_connection = True , refresh_duration = 86400 , debug = False ):
529528 """ Create an L{API} object.
530529 @param user_key: (Optional; required for servers requiring authentication.) An authentication string to be sent
531530 as user_key with all requests. The default Rosette server requires authentication.
@@ -540,10 +539,18 @@ def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1',
540539 self .debug = debug
541540 self .useMultipart = False
542541 self .version_checked = False
542+
543543 global N_RETRIES
544+ global REUSE_CONNECTION
545+ global CONNECTION_REFRESH_DURATION
546+
544547 if (retries < 1 ):
545548 retries = 1
549+ if refresh_duration < 60 :
550+ refresh_duration = 60
546551 N_RETRIES = retries
552+ REUSE_CONNECTION = reuse_connection
553+ CONNECTION_REFRESH_DURATION = refresh_duration
547554
548555 def check_version (self ):
549556 if self .version_checked :
0 commit comments