22
33"""
44Python client for the Rosette API.
5+
56Copyright (c) 2014-2015 Basis Technology Corporation.
7+
68Licensed under the Apache License, Version 2.0 (the "License");
79you may not use this file except in compliance with the License.
810You may obtain a copy of the License at
911http://www.apache.org/licenses/LICENSE-2.0
12+
1013Unless required by applicable law or agreed to in writing, software
1114distributed under the License is distributed on an "AS IS" BASIS,
1215WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2124import logging
2225import sys
2326import pprint
27+ import time
28+ from socket import gethostbyname , gaierror
2429from datetime import datetime
2530
2631_ACCEPTABLE_SERVER_VERSION = "0.5"
3136CONNECTION_TYPE = ""
3237CONNECTION_START = datetime .now ()
3338CONNECTION_REFRESH_DURATION = 86400
34-
39+ N_RETRIES = 3
3540
3641_IsPy3 = sys .version_info [0 ] == 3
3742
@@ -96,30 +101,55 @@ def _retrying_request(op, url, data, headers):
96101 code = "unknownError"
97102 rdata = None
98103 for i in range (N_RETRIES + 1 ):
99- HTTP_CONNECTION .request (op , url , data , headers )
100- response = HTTP_CONNECTION .getresponse ()
101- status = response .status
102- rdata = response .read ()
103- if status < 500 :
104- if not REUSE_CONNECTION :
105- HTTP_CONNECTION .close ()
106- return rdata , status
107- if rdata is not None :
108- try :
109- the_json = _my_loads (rdata )
110- if "message" in the_json :
111- message = the_json ["message" ]
112- if "code" in the_json :
113- code = the_json ["code" ]
114- except :
115- pass
104+ # Try to connect with the Rosette API server
105+ # 500 errors will store a message and code
106+ try :
107+ HTTP_CONNECTION .request (op , url , data , headers )
108+ response = HTTP_CONNECTION .getresponse ()
109+ status = response .status
110+ rdata = response .read ()
111+ if status < 500 :
112+ if not REUSE_CONNECTION :
113+ HTTP_CONNECTION .close ()
114+ return rdata , status
115+ if rdata is not None :
116+ try :
117+ the_json = _my_loads (rdata )
118+ if "message" in the_json :
119+ message = the_json ["message" ]
120+ if "code" in the_json :
121+ code = the_json ["code" ]
122+ except :
123+ pass
124+ # If there are issues connecting to the API server,
125+ # try to regenerate the connection as long as there are
126+ # still retries left.
127+ # A short sleep delay occurs (similar to google reconnect)
128+ # if the problem was a temporal one.
129+ except (httplib .BadStatusLine , gaierror ) as e :
130+ totalTime = CONNECTION_REFRESH_DURATION
131+ if i == N_RETRIES - 1 :
132+ raise RosetteException ("ConnectionError" , "Unable to establish connection to the Rosette API server" , url )
133+ else :
134+ if not REUSE_CONNECTION or HTTP_CONNECTION is None or totalTime >= CONNECTION_REFRESH_DURATION :
135+ time .sleep (min (5 * (i + 1 ) * (i + 1 ), 300 ))
136+ parsed = urlparse .urlparse (url )
137+ loc = parsed .netloc
138+ CONNECTION_TYPE = parsed .scheme
139+ CONNECTION_START = datetime .now ()
140+ if parsed .scheme == "https" :
141+ HTTP_CONNECTION = httplib .HTTPSConnection (loc )
142+ else :
143+ HTTP_CONNECTION = httplib .HTTPConnection (loc )
144+
116145 # Do not wait to retry -- the model is that a bunch of dynamically-routed
117146 # resources has failed -- Retry means some other set of servelets and their
118147 # underlings will be called up, and maybe they'll do better.
119148 # This will not help with a persistent or impassible delay situation,
120149 # but the former case is thought to be more likely.
150+
121151 if not REUSE_CONNECTION :
122- HTTP_CONECTION .close ()
152+ HTTP_CONNECTION .close ()
123153
124154 if message is None :
125155 message = "A retryable network operation has not succeeded after " + str (N_RETRIES ) + " attempts"
@@ -157,6 +187,7 @@ def add_query(orig_url, key, value):
157187
158188class RosetteException (Exception ):
159189 """Exception thrown by all Rosette API operations for errors local and remote.
190+
160191 TBD. Right now, the only valid operation is conversion to __str__.
161192 """
162193
@@ -273,7 +304,9 @@ class DocumentParameters(_DocumentParamSetBase):
273304 convenience instance methods L{DocumentParameters.load_document_file}
274305 and L{DocumentParameters.load_document_string}. The unit size and
275306 data format are defaulted to L{InputUnit.DOC} and L{DataFormat.SIMPLE}.
307+
276308 Using subscripts instead of instance variables facilitates diagnosis.
309+
277310 If the field C{contentUri} is set to the URL of a web page (only
278311 protocols C{http, https, ftp, ftps} are accepted), the server will
279312 fetch the content from that web page. In this case, neither C{content}
@@ -352,13 +385,21 @@ class NameTranslationParameters(_DocumentParamSetBase):
352385 All are optional except C{name} and C{targetLanguage}. Scripts are in
353386 ISO15924 codes, and languages in ISO639 (two- or three-letter) codes. See the Name Translation documentation for
354387 more description of these terms, as well as the content of the return result.
388+
355389 C{name} The name to be translated.
390+
356391 C{targetLangauge} The language into which the name is to be translated.
392+
357393 C{entityType} The entity type (TBD) of the name.
394+
358395 C{sourceLanguageOfOrigin} The language of origin of the name.
396+
359397 C{sourceLanguageOfUse} The language of use of the name.
398+
360399 C{sourceScript} The script in which the name is supplied.
400+
361401 C{targetScript} The script into which the name should be translated.
402+
362403 C{targetScheme} The transliteration scheme by which the translated name should be rendered.
363404 """
364405
@@ -376,12 +417,19 @@ def validate(self):
376417class NameMatchingParameters (_DocumentParamSetBase ):
377418 """Parameter object for C{matched_name} endpoint.
378419 All are required.
420+
379421 C{name1} The name to be matched, a C{name} object.
422+
380423 C{name2} The name to be matched, a C{name} object.
424+
381425 The C{name} object contains these fields:
426+
382427 C{text} Text of the name, required.
428+
383429 C{language} Language of the name in ISO639 three-letter code, optional.
430+
384431 C{script} The ISO15924 code of the name, optional.
432+
385433 C{entityType} The entity type, can be "PERSON", "LOCATION" or "ORGANIZATION", optional.
386434 """
387435
@@ -402,9 +450,11 @@ class EndpointCaller:
402450 of the Rosette server, specified at its creation. Use the specific
403451 instance methods of the L{API} object to create L{EndpointCaller} objects bound to
404452 corresponding endpoints.
453+
405454 Use L{EndpointCaller.ping} to ping, and L{EndpointCaller.info} to retrieve server info.
406455 For all other types of requests, use L{EndpointCaller.call}, which accepts
407456 an argument specifying the data to be processed and certain metadata.
457+
408458 The results of all operations are returned as python dictionaries, whose
409459 keys and values correspond exactly to those of the corresponding
410460 JSON return value described in the Rosette web service documentation.
@@ -490,9 +540,11 @@ def call(self, parameters):
490540 endpoints except C{translated_name} and C{matched_name}, it must be a L{DocumentParameters}
491541 object; for C{translated_name}, it must be an L{NameTranslationParameters} object;
492542 for C{matched_name}, it must be an L{NameMatchingParameters} object.
543+
493544 In all cases, the result is returned as a python dictionary
494545 conforming to the JSON object described in the endpoint's entry
495546 in the Rosette web service documentation.
547+
496548 @param parameters: An object specifying the data,
497549 and possible metadata, to be processed by the endpoint. See the
498550 details for those object types.
@@ -515,6 +567,9 @@ def call(self, parameters):
515567 headers ["user_key" ] = self .user_key
516568 headers ['Content-Type' ] = "application/json"
517569 r = _post_http (url , params_to_serialize , headers )
570+ # pprint.pprint(headers)
571+ # pprint.pprint(url)
572+ # pprint.pprint(params_to_serialize)
518573 return self .__finish_result (r , "operate" )
519574
520575
@@ -546,7 +601,7 @@ def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1',
546601
547602 if (retries < 1 ):
548603 retries = 1
549- if refresh_duration < 60 :
604+ if ( refresh_duration < 60 ) :
550605 refresh_duration = 60
551606 N_RETRIES = retries
552607 REUSE_CONNECTION = reuse_connection
0 commit comments