2727import os
2828from socket import gethostbyname , gaierror
2929from datetime import datetime
30+ import requests
3031
31- _BINDING_VERSION = "0.8 "
32+ _BINDING_VERSION = "0.10 "
3233_GZIP_BYTEARRAY = bytearray ([0x1F , 0x8b , 0x08 ])
3334N_RETRIES = 3
3435HTTP_CONNECTION = None
@@ -229,24 +230,6 @@ def validate(cls, value, name):
229230 " is not one of " + ", " .join (values ) + "." , repr (value ))
230231
231232
232- class DataFormat (_PseudoEnum ):
233- """Data Format, as much as it is known."""
234- SIMPLE = "text/plain"
235- """The data is unstructured text, supplied as a possibly-unicode string."""
236- JSON = "application/json"
237- """To be supplied. The API uses JSON internally, but that is not what this refers to."""
238- HTML = "text/html"
239- """The data is a 'loose' HTML page; that is, it may not be HTML-compliant, or may even not
240- really be HTML. The data must be a narrow (single-byte) string, not a python Unicode string,
241- perhaps read from a file. (Of course, it can be UTF-8 encoded)."""
242- XHTML = "application/xhtml+xml"
243- """The data is a compliant XHTML page. The data must be a narrow (single-byte) string, not a
244- python Unicode string, perhaps read from a file. (Of course, it can be UTF-8 encoded)."""
245- UNSPECIFIED = "application/octet-stream"
246- """The data is of unknown format, it may be a binary data type (the contents of a binary file),
247- or may not. It will be sent as is and identified and analyzed by the server."""
248-
249-
250233class MorphologyOutput (_PseudoEnum ):
251234 LEMMAS = "lemmas"
252235 PARTS_OF_SPEECH = "parts-of-speech"
@@ -298,7 +281,7 @@ def _byteify(s): # py 3 only
298281class DocumentParameters (_DocumentParamSetBase ):
299282 """Parameter object for all operations requiring input other than
300283 translated_name.
301- Three fields, C{content}, C{contentType}, and C{inputUri}, are set via
284+ Two fields, C{content} and C{inputUri}, are set via
302285 the subscript operator, e.g., C{params["content"]}, or the
303286 convenience instance methods L{DocumentParameters.load_document_file}
304287 and L{DocumentParameters.load_document_string}.
@@ -307,13 +290,14 @@ class DocumentParameters(_DocumentParamSetBase):
307290
308291 If the field C{contentUri} is set to the URL of a web page (only
309292 protocols C{http, https, ftp, ftps} are accepted), the server will
310- fetch the content from that web page. In this case, neither C{content}
311- nor C{contentType} may be set.
293+ fetch the content from that web page. In this case, C{content} may not be set.
312294 """
313295
314296 def __init__ (self ):
315297 """Create a L{DocumentParameters} object."""
316- _DocumentParamSetBase .__init__ (self , ("content" , "contentUri" , "contentType" , "language" ))
298+ _DocumentParamSetBase .__init__ (self , ("content" , "contentUri" , "language" ))
299+ self .file_name = ""
300+ self .useMultipart = False
317301
318302 def validate (self ):
319303 """Internal. Do not use."""
@@ -328,46 +312,26 @@ def serialize(self):
328312 """Internal. Do not use."""
329313 self .validate ()
330314 slz = super (DocumentParameters , self ).serialize ()
331- if self ["contentType" ] is None and self ["contentUri" ] is None :
332- slz ["contentType" ] = DataFormat .SIMPLE
333- elif self ["contentType" ] in (DataFormat .HTML , DataFormat .XHTML , DataFormat .UNSPECIFIED ):
334- content = slz ["content" ]
335- if _IsPy3 and isinstance (content , str ):
336- content = _byteify (content )
337- encoded = content
338- if _IsPy3 :
339- encoded = encoded .decode ("utf-8" ) # if py3, need chars.
340- slz ["content" ] = encoded
341315 return slz
342316
343- def load_document_file (self , path , data_type = DataFormat . UNSPECIFIED ):
317+ def load_document_file (self , path ):
344318 """Loads a file into the object.
345319 The file will be read as bytes; the appropriate conversion will
346320 be determined by the server.
347321 @parameter path: Pathname of a file acceptable to the C{open} function.
348- @parameter data_type: One of L{DataFormat.HTML}, L{DataFormat.XHTML}, or L{DataFormat.UNSPECIFIED}.
349- No other types are acceptable at this time, although HTML is broad enough to include text strings
350- without markup.
351- If the data type is unknown, or describes a binary file, use the default (L{DataFormat.UNSPECIFIED}).
352- @type data_type: L{DataFormat}
353322 """
354- if data_type not in ( DataFormat . HTML , DataFormat . XHTML , DataFormat . UNSPECIFIED ):
355- raise RosetteException ( "badArgument" , "Must supply one of HTML, XHTML, or UNSPECIFIED" , data_type )
356- self .load_document_string (open (path , "rb" ).read (), data_type )
323+ self . useMultipart = True
324+ self . file_name = path
325+ self .load_document_string (open (path , "rb" ).read ())
357326
358- def load_document_string (self , s , data_type ):
327+ def load_document_string (self , s ):
359328 """Loads a string into the object.
360329 The string will be taken as bytes or as Unicode dependent upon
361- its native python type and the data type asked for; if the
362- type is HTML or XHTML, bytes, not python Unicode, are expected,
363- the encoding to be determined by the server.
330+ its native python type.
364331 @parameter s: A string, possibly a unicode-string, to be loaded
365- for subsequent analysis, as per the C{data_type}.
366- @parameter data_type: The data type of the string, as per L{DataFormat}.
367- @type data_type: L{DataFormat}
332+ for subsequent analysis.
368333 """
369334 self ["content" ] = s
370- self ["contentType" ] = data_type
371335
372336
373337class RelationshipsParameters (DocumentParameters ):
@@ -376,7 +340,8 @@ class RelationshipsParameters(DocumentParameters):
376340 to specify the relationships-unique options parameter."""
377341 def __init__ (self ):
378342 """Create a L{RelationshipsParameters} object."""
379- _DocumentParamSetBase .__init__ (self , ("content" , "contentUri" , "contentType" , "language" , "options" ))
343+ self .useMultipart = False
344+ _DocumentParamSetBase .__init__ (self , ("content" , "contentUri" , "language" , "options" ))
380345
381346
382347class NameTranslationParameters (_DocumentParamSetBase ):
@@ -405,6 +370,7 @@ class NameTranslationParameters(_DocumentParamSetBase):
405370 """
406371
407372 def __init__ (self ):
373+ self .useMultipart = False
408374 _DocumentParamSetBase .__init__ (self , ("name" , "targetLanguage" , "entityType" , "sourceLanguageOfOrigin" ,
409375 "sourceLanguageOfUse" , "sourceScript" , "targetScript" , "targetScheme" ))
410376
@@ -435,6 +401,7 @@ class NameSimilarityParameters(_DocumentParamSetBase):
435401 """
436402
437403 def __init__ (self ):
404+ self .useMultipart = False
438405 _DocumentParamSetBase .__init__ (self , ("name1" , "name2" ))
439406
440407 def validate (self ):
@@ -468,7 +435,7 @@ def __init__(self, api, suburl):
468435 self .service_url = api .service_url
469436 self .user_key = api .user_key
470437 self .logger = api .logger
471- self .useMultipart = api . useMultipart
438+ self .useMultipart = False
472439 self .checker = lambda : api .check_version ()
473440 self .suburl = suburl
474441 self .debug = api .debug
@@ -497,9 +464,6 @@ def __finish_result(self, r, ename):
497464 complaint_url + " : failed to communicate with Rosette" ,
498465 msg )
499466
500- def _set_use_multipart (self , value ):
501- self .useMultipart = value
502-
503467 def info (self ):
504468 """Issues an "info" request to the L{EndpointCaller}'s specific endpoint.
505469 @return: A dictionary telling server version and other
@@ -574,22 +538,32 @@ def call(self, parameters):
574538
575539 self .checker ()
576540
577- if self .useMultipart and (parameters ['contentType' ] != DataFormat .SIMPLE ):
578- raise RosetteException ("incompatible" , "Multipart requires contentType SIMPLE" ,
579- repr (parameters ['contentType' ]))
541+ self .useMultipart = parameters .useMultipart
580542 url = self .service_url + self .suburl
581- if self .debug :
582- url = add_query (url , "debug" , "true" )
583- self .logger .info ('operate: ' + url )
584543 params_to_serialize = parameters .serialize ()
585- headers = {'Accept' : "application/json" , 'Accept-Encoding' : "gzip" }
544+ headers = {}
586545 if self .user_key is not None :
587546 headers ["X-RosetteAPI-Key" ] = self .user_key
588- headers ['Content-Type' ] = "application/json"
589- r = _post_http (url , params_to_serialize , headers )
590- # pprint.pprint(headers)
591- # pprint.pprint(url)
592- # pprint.pprint(params_to_serialize)
547+ if self .useMultipart :
548+ params = dict ((key ,value ) for key , value in params_to_serialize .iteritems () if key == 'language' )
549+ files = {'content' : (os .path .basename (parameters .file_name ), params_to_serialize ["content" ], 'text/plain' ),
550+ 'request' : ('request_options' , json .dumps (params ), 'application/json' )}
551+ request = requests .Request ('POST' , url , files = files )
552+ prepared_request = request .prepare ()
553+ session = requests .Session ()
554+ resp = session .send (prepared_request )
555+ rdata = resp .content
556+ response_headers = {"responseHeaders" : dict (resp .headers )}
557+ status = resp .status_code
558+ r = _ReturnObject (_my_loads (rdata , response_headers ), status )
559+ else :
560+ if self .debug :
561+ url = add_query (url , "debug" , "true" )
562+ self .logger .info ('operate: ' + url )
563+ headers ['Accept' ] = "application/json"
564+ headers ['Accept-Encoding' ] = "gzip"
565+ headers ['Content-Type' ] = "application/json"
566+ r = _post_http (url , params_to_serialize , headers )
593567 return self .__finish_result (r , "operate" )
594568
595569
@@ -611,7 +585,6 @@ def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1/'
611585 self .logger = logging .getLogger ('rosette.api' )
612586 self .logger .info ('Initialized on ' + self .service_url )
613587 self .debug = debug
614- self .useMultipart = False
615588 self .version_checked = False
616589
617590 global N_RETRIES
@@ -638,9 +611,6 @@ def check_version(self):
638611 self .version_checked = True
639612 return True
640613
641- def _set_use_multipart (self , value ):
642- self .useMultipart = value
643-
644614 def ping (self ):
645615 """
646616 Create a ping L{EndpointCaller} for the server and ping it.
0 commit comments