2727import os
2828from socket import gethostbyname , gaierror
2929from datetime import datetime
30+ import requests
31+ from pprint import pprint
3032
31- _BINDING_VERSION = "0.8 "
33+ _BINDING_VERSION = "0.10 "
3234_GZIP_BYTEARRAY = bytearray ([0x1F , 0x8b , 0x08 ])
3335N_RETRIES = 3
3436HTTP_CONNECTION = None
@@ -229,24 +231,6 @@ def validate(cls, value, name):
229231 " is not one of " + ", " .join (values ) + "." , repr (value ))
230232
231233
232- class DataFormat (_PseudoEnum ):
233- """Data Format, as much as it is known."""
234- SIMPLE = "text/plain"
235- """The data is unstructured text, supplied as a possibly-unicode string."""
236- JSON = "application/json"
237- """To be supplied. The API uses JSON internally, but that is not what this refers to."""
238- HTML = "text/html"
239- """The data is a 'loose' HTML page; that is, it may not be HTML-compliant, or may even not
240- really be HTML. The data must be a narrow (single-byte) string, not a python Unicode string,
241- perhaps read from a file. (Of course, it can be UTF-8 encoded)."""
242- XHTML = "application/xhtml+xml"
243- """The data is a compliant XHTML page. The data must be a narrow (single-byte) string, not a
244- python Unicode string, perhaps read from a file. (Of course, it can be UTF-8 encoded)."""
245- UNSPECIFIED = "application/octet-stream"
246- """The data is of unknown format, it may be a binary data type (the contents of a binary file),
247- or may not. It will be sent as is and identified and analyzed by the server."""
248-
249-
250234class MorphologyOutput (_PseudoEnum ):
251235 LEMMAS = "lemmas"
252236 PARTS_OF_SPEECH = "parts-of-speech"
@@ -298,7 +282,7 @@ def _byteify(s): # py 3 only
298282class DocumentParameters (_DocumentParamSetBase ):
299283 """Parameter object for all operations requiring input other than
300284 translated_name.
301- Three fields, C{content}, C{contentType}, and C{inputUri}, are set via
285+ Two fields, C{content} and C{inputUri}, are set via
302286 the subscript operator, e.g., C{params["content"]}, or the
303287 convenience instance methods L{DocumentParameters.load_document_file}
304288 and L{DocumentParameters.load_document_string}.
@@ -307,13 +291,14 @@ class DocumentParameters(_DocumentParamSetBase):
307291
308292 If the field C{contentUri} is set to the URL of a web page (only
309293 protocols C{http, https, ftp, ftps} are accepted), the server will
310- fetch the content from that web page. In this case, neither C{content}
311- nor C{contentType} may be set.
294+ fetch the content from that web page. In this case, C{content} may not be set.
312295 """
313296
314297 def __init__ (self ):
315298 """Create a L{DocumentParameters} object."""
316- _DocumentParamSetBase .__init__ (self , ("content" , "contentUri" , "contentType" , "language" ))
299+ _DocumentParamSetBase .__init__ (self , ("content" , "contentUri" , "language" ))
300+ self .file_name = ""
301+ self .useMultipart = False
317302
318303 def validate (self ):
319304 """Internal. Do not use."""
@@ -328,47 +313,26 @@ def serialize(self):
328313 """Internal. Do not use."""
329314 self .validate ()
330315 slz = super (DocumentParameters , self ).serialize ()
331- if self ["contentType" ] is None and self ["contentUri" ] is None :
332- slz ["contentType" ] = DataFormat .SIMPLE
333- elif self ["contentType" ] in (DataFormat .HTML , DataFormat .XHTML , DataFormat .UNSPECIFIED ):
334- content = slz ["content" ]
335- if _IsPy3 and isinstance (content , str ):
336- content = _byteify (content )
337-
338- encoded = base64 .b64encode (content )
339- if _IsPy3 :
340- encoded = encoded .decode ("utf-8" ) # if py3, need chars.
341- slz ["content" ] = encoded
342316 return slz
343317
344- def load_document_file (self , path , data_type = DataFormat . UNSPECIFIED ):
318+ def load_document_file (self , path ):
345319 """Loads a file into the object.
346320 The file will be read as bytes; the appropriate conversion will
347321 be determined by the server.
348322 @parameter path: Pathname of a file acceptable to the C{open} function.
349- @parameter data_type: One of L{DataFormat.HTML}, L{DataFormat.XHTML}, or L{DataFormat.UNSPECIFIED}.
350- No other types are acceptable at this time, although HTML is broad enough to include text strings
351- without markup.
352- If the data type is unknown, or describes a binary file, use the default (L{DataFormat.UNSPECIFIED}).
353- @type data_type: L{DataFormat}
354323 """
355- if data_type not in ( DataFormat . HTML , DataFormat . XHTML , DataFormat . UNSPECIFIED ):
356- raise RosetteException ( "badArgument" , "Must supply one of HTML, XHTML, or UNSPECIFIED" , data_type )
357- self .load_document_string (open (path , "rb" ).read (), data_type )
324+ self . useMultipart = True
325+ self . file_name = path
326+ self .load_document_string (open (path , "rb" ).read ())
358327
359- def load_document_string (self , s , data_type ):
328+ def load_document_string (self , s ):
360329 """Loads a string into the object.
361330 The string will be taken as bytes or as Unicode dependent upon
362- its native python type and the data type asked for; if the
363- type is HTML or XHTML, bytes, not python Unicode, are expected,
364- the encoding to be determined by the server.
331+ its native python type.
365332 @parameter s: A string, possibly a unicode-string, to be loaded
366- for subsequent analysis, as per the C{data_type}.
367- @parameter data_type: The data type of the string, as per L{DataFormat}.
368- @type data_type: L{DataFormat}
333+ for subsequent analysis.
369334 """
370335 self ["content" ] = s
371- self ["contentType" ] = data_type
372336
373337
374338class RelationshipsParameters (DocumentParameters ):
@@ -377,7 +341,8 @@ class RelationshipsParameters(DocumentParameters):
377341 to specify the relationships-unique options parameter."""
378342 def __init__ (self ):
379343 """Create a L{RelationshipsParameters} object."""
380- _DocumentParamSetBase .__init__ (self , ("content" , "contentUri" , "contentType" , "language" , "options" ))
344+ self .useMultipart = False
345+ _DocumentParamSetBase .__init__ (self , ("content" , "contentUri" , "language" , "options" ))
381346
382347
383348class NameTranslationParameters (_DocumentParamSetBase ):
@@ -406,6 +371,7 @@ class NameTranslationParameters(_DocumentParamSetBase):
406371 """
407372
408373 def __init__ (self ):
374+ self .useMultipart = False
409375 _DocumentParamSetBase .__init__ (self , ("name" , "targetLanguage" , "entityType" , "sourceLanguageOfOrigin" ,
410376 "sourceLanguageOfUse" , "sourceScript" , "targetScript" , "targetScheme" ))
411377
@@ -436,6 +402,7 @@ class NameSimilarityParameters(_DocumentParamSetBase):
436402 """
437403
438404 def __init__ (self ):
405+ self .useMultipart = False
439406 _DocumentParamSetBase .__init__ (self , ("name1" , "name2" ))
440407
441408 def validate (self ):
@@ -469,7 +436,7 @@ def __init__(self, api, suburl):
469436 self .service_url = api .service_url
470437 self .user_key = api .user_key
471438 self .logger = api .logger
472- self .useMultipart = api . useMultipart
439+ self .useMultipart = False
473440 self .checker = lambda : api .check_version ()
474441 self .suburl = suburl
475442 self .debug = api .debug
@@ -498,9 +465,6 @@ def __finish_result(self, r, ename):
498465 complaint_url + " : failed to communicate with Rosette" ,
499466 msg )
500467
501- def _set_use_multipart (self , value ):
502- self .useMultipart = value
503-
504468 def info (self ):
505469 """Issues an "info" request to the L{EndpointCaller}'s specific endpoint.
506470 @return: A dictionary telling server version and other
@@ -575,22 +539,33 @@ def call(self, parameters):
575539
576540 self .checker ()
577541
578- if self .useMultipart and (parameters ['contentType' ] != DataFormat .SIMPLE ):
579- raise RosetteException ("incompatible" , "Multipart requires contentType SIMPLE" ,
580- repr (parameters ['contentType' ]))
542+ self .useMultipart = parameters .useMultipart
581543 url = self .service_url + self .suburl
582- if self .debug :
583- url = add_query (url , "debug" , "true" )
584- self .logger .info ('operate: ' + url )
585544 params_to_serialize = parameters .serialize ()
586- headers = {'Accept' : "application/json" , 'Accept-Encoding' : "gzip" }
545+ headers = {}
587546 if self .user_key is not None :
588547 headers ["X-RosetteAPI-Key" ] = self .user_key
589- headers ['Content-Type' ] = "application/json"
590- r = _post_http (url , params_to_serialize , headers )
591- # pprint.pprint(headers)
592- # pprint.pprint(url)
593- # pprint.pprint(params_to_serialize)
548+ if self .useMultipart :
549+ headers = {'Content-Disposition' : 'attachment' }
550+ params = dict ((key ,value ) for key , value in params_to_serialize .iteritems () if key == 'language' )
551+ files = {'content' : (parameters .file_name , params_to_serialize ["content" ], 'text/plain' ),
552+ 'request' : ('request_options' , json .dumps (params ), 'application/json' )}
553+ request = requests .Request ('POST' , url , files = files , headers = headers , params = [])
554+ prepared_request = request .prepare ()
555+ session = requests .Session ()
556+ resp = session .send (prepared_request )
557+ rdata = resp .content
558+ response_headers = {"responseHeaders" : dict (resp .headers )}
559+ status = resp .status_code
560+ r = _ReturnObject (_my_loads (rdata , response_headers ), status )
561+ else :
562+ if self .debug :
563+ url = add_query (url , "debug" , "true" )
564+ self .logger .info ('operate: ' + url )
565+ headers ['Accept' ] = "application/json"
566+ headers ['Accept-Encoding' ] = "gzip"
567+ headers ['Content-Type' ] = "application/json"
568+ r = _post_http (url , params_to_serialize , headers )
594569 return self .__finish_result (r , "operate" )
595570
596571
@@ -612,7 +587,6 @@ def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1/'
612587 self .logger = logging .getLogger ('rosette.api' )
613588 self .logger .info ('Initialized on ' + self .service_url )
614589 self .debug = debug
615- self .useMultipart = False
616590 self .version_checked = False
617591
618592 global N_RETRIES
@@ -639,9 +613,6 @@ def check_version(self):
639613 self .version_checked = True
640614 return True
641615
642- def _set_use_multipart (self , value ):
643- self .useMultipart = value
644-
645616 def ping (self ):
646617 """
647618 Create a ping L{EndpointCaller} for the server and ping it.
0 commit comments