Skip to content

Commit f3f6ef4

Browse files
committed
Internal calls are now structured as multipart when handling files, as in java's setupMultipartRequest.
1 parent ee96f60 commit f3f6ef4

File tree

3 files changed

+47
-75
lines changed

3 files changed

+47
-75
lines changed

docker/tox.ini

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ envlist = py26, py27, py33, py34
99

1010
[testenv]
1111
commands =
12-
py.test {toxinidir}/tests
12+
py.test {toxinidir}/tests -s
1313
deps =
1414
pytest
1515
pytest-pep8
1616
httpretty==0.8.10
17-
epydoc
17+
epydoc
18+
requests

examples/sentiment.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,4 @@ def run(key, altUrl='https://api.rosette.com/rest/v1/'):
4242
if __name__ == '__main__':
4343
args = parser.parse_args()
4444
result = run(args.key, args.url)
45-
print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8"))
45+
print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8"))

rosette/api.py

Lines changed: 43 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@
2727
import os
2828
from socket import gethostbyname, gaierror
2929
from datetime import datetime
30+
import requests
31+
from pprint import pprint
3032

31-
_BINDING_VERSION = "0.8"
33+
_BINDING_VERSION = "0.10"
3234
_GZIP_BYTEARRAY = bytearray([0x1F, 0x8b, 0x08])
3335
N_RETRIES = 3
3436
HTTP_CONNECTION = None
@@ -229,24 +231,6 @@ def validate(cls, value, name):
229231
" is not one of " + ", ".join(values) + ".", repr(value))
230232

231233

232-
class DataFormat(_PseudoEnum):
233-
"""Data Format, as much as it is known."""
234-
SIMPLE = "text/plain"
235-
"""The data is unstructured text, supplied as a possibly-unicode string."""
236-
JSON = "application/json"
237-
"""To be supplied. The API uses JSON internally, but that is not what this refers to."""
238-
HTML = "text/html"
239-
"""The data is a 'loose' HTML page; that is, it may not be HTML-compliant, or may even not
240-
really be HTML. The data must be a narrow (single-byte) string, not a python Unicode string,
241-
perhaps read from a file. (Of course, it can be UTF-8 encoded)."""
242-
XHTML = "application/xhtml+xml"
243-
"""The data is a compliant XHTML page. The data must be a narrow (single-byte) string, not a
244-
python Unicode string, perhaps read from a file. (Of course, it can be UTF-8 encoded)."""
245-
UNSPECIFIED = "application/octet-stream"
246-
"""The data is of unknown format, it may be a binary data type (the contents of a binary file),
247-
or may not. It will be sent as is and identified and analyzed by the server."""
248-
249-
250234
class MorphologyOutput(_PseudoEnum):
251235
LEMMAS = "lemmas"
252236
PARTS_OF_SPEECH = "parts-of-speech"
@@ -298,7 +282,7 @@ def _byteify(s): # py 3 only
298282
class DocumentParameters(_DocumentParamSetBase):
299283
"""Parameter object for all operations requiring input other than
300284
translated_name.
301-
Three fields, C{content}, C{contentType}, and C{inputUri}, are set via
285+
Two fields, C{content} and C{inputUri}, are set via
302286
the subscript operator, e.g., C{params["content"]}, or the
303287
convenience instance methods L{DocumentParameters.load_document_file}
304288
and L{DocumentParameters.load_document_string}.
@@ -307,13 +291,14 @@ class DocumentParameters(_DocumentParamSetBase):
307291
308292
If the field C{contentUri} is set to the URL of a web page (only
309293
protocols C{http, https, ftp, ftps} are accepted), the server will
310-
fetch the content from that web page. In this case, neither C{content}
311-
nor C{contentType} may be set.
294+
fetch the content from that web page. In this case, C{content} may not be set.
312295
"""
313296

314297
def __init__(self):
315298
"""Create a L{DocumentParameters} object."""
316-
_DocumentParamSetBase.__init__(self, ("content", "contentUri", "contentType", "language"))
299+
_DocumentParamSetBase.__init__(self, ("content", "contentUri", "language"))
300+
self.file_name = ""
301+
self.useMultipart = False
317302

318303
def validate(self):
319304
"""Internal. Do not use."""
@@ -328,47 +313,26 @@ def serialize(self):
328313
"""Internal. Do not use."""
329314
self.validate()
330315
slz = super(DocumentParameters, self).serialize()
331-
if self["contentType"] is None and self["contentUri"] is None:
332-
slz["contentType"] = DataFormat.SIMPLE
333-
elif self["contentType"] in (DataFormat.HTML, DataFormat.XHTML, DataFormat.UNSPECIFIED):
334-
content = slz["content"]
335-
if _IsPy3 and isinstance(content, str):
336-
content = _byteify(content)
337-
338-
encoded = base64.b64encode(content)
339-
if _IsPy3:
340-
encoded = encoded.decode("utf-8") # if py3, need chars.
341-
slz["content"] = encoded
342316
return slz
343317

344-
def load_document_file(self, path, data_type=DataFormat.UNSPECIFIED):
318+
def load_document_file(self, path):
345319
"""Loads a file into the object.
346320
The file will be read as bytes; the appropriate conversion will
347321
be determined by the server.
348322
@parameter path: Pathname of a file acceptable to the C{open} function.
349-
@parameter data_type: One of L{DataFormat.HTML}, L{DataFormat.XHTML}, or L{DataFormat.UNSPECIFIED}.
350-
No other types are acceptable at this time, although HTML is broad enough to include text strings
351-
without markup.
352-
If the data type is unknown, or describes a binary file, use the default (L{DataFormat.UNSPECIFIED}).
353-
@type data_type: L{DataFormat}
354323
"""
355-
if data_type not in (DataFormat.HTML, DataFormat.XHTML, DataFormat.UNSPECIFIED):
356-
raise RosetteException("badArgument", "Must supply one of HTML, XHTML, or UNSPECIFIED", data_type)
357-
self.load_document_string(open(path, "rb").read(), data_type)
324+
self.useMultipart = True
325+
self.file_name = path
326+
self.load_document_string(open(path, "rb").read())
358327

359-
def load_document_string(self, s, data_type):
328+
def load_document_string(self, s):
360329
"""Loads a string into the object.
361330
The string will be taken as bytes or as Unicode dependent upon
362-
its native python type and the data type asked for; if the
363-
type is HTML or XHTML, bytes, not python Unicode, are expected,
364-
the encoding to be determined by the server.
331+
its native python type.
365332
@parameter s: A string, possibly a unicode-string, to be loaded
366-
for subsequent analysis, as per the C{data_type}.
367-
@parameter data_type: The data type of the string, as per L{DataFormat}.
368-
@type data_type: L{DataFormat}
333+
for subsequent analysis.
369334
"""
370335
self["content"] = s
371-
self["contentType"] = data_type
372336

373337

374338
class RelationshipsParameters(DocumentParameters):
@@ -377,7 +341,8 @@ class RelationshipsParameters(DocumentParameters):
377341
to specify the relationships-unique options parameter."""
378342
def __init__(self):
379343
"""Create a L{RelationshipsParameters} object."""
380-
_DocumentParamSetBase.__init__(self, ("content", "contentUri", "contentType", "language", "options"))
344+
self.useMultipart = False
345+
_DocumentParamSetBase.__init__(self, ("content", "contentUri", "language", "options"))
381346

382347

383348
class NameTranslationParameters(_DocumentParamSetBase):
@@ -406,6 +371,7 @@ class NameTranslationParameters(_DocumentParamSetBase):
406371
"""
407372

408373
def __init__(self):
374+
self.useMultipart = False
409375
_DocumentParamSetBase.__init__(self, ("name", "targetLanguage", "entityType", "sourceLanguageOfOrigin",
410376
"sourceLanguageOfUse", "sourceScript", "targetScript", "targetScheme"))
411377

@@ -436,6 +402,7 @@ class NameSimilarityParameters(_DocumentParamSetBase):
436402
"""
437403

438404
def __init__(self):
405+
self.useMultipart = False
439406
_DocumentParamSetBase.__init__(self, ("name1", "name2"))
440407

441408
def validate(self):
@@ -469,7 +436,7 @@ def __init__(self, api, suburl):
469436
self.service_url = api.service_url
470437
self.user_key = api.user_key
471438
self.logger = api.logger
472-
self.useMultipart = api.useMultipart
439+
self.useMultipart = False
473440
self.checker = lambda: api.check_version()
474441
self.suburl = suburl
475442
self.debug = api.debug
@@ -498,9 +465,6 @@ def __finish_result(self, r, ename):
498465
complaint_url + " : failed to communicate with Rosette",
499466
msg)
500467

501-
def _set_use_multipart(self, value):
502-
self.useMultipart = value
503-
504468
def info(self):
505469
"""Issues an "info" request to the L{EndpointCaller}'s specific endpoint.
506470
@return: A dictionary telling server version and other
@@ -575,22 +539,33 @@ def call(self, parameters):
575539

576540
self.checker()
577541

578-
if self.useMultipart and (parameters['contentType'] != DataFormat.SIMPLE):
579-
raise RosetteException("incompatible", "Multipart requires contentType SIMPLE",
580-
repr(parameters['contentType']))
542+
self.useMultipart = parameters.useMultipart
581543
url = self.service_url + self.suburl
582-
if self.debug:
583-
url = add_query(url, "debug", "true")
584-
self.logger.info('operate: ' + url)
585544
params_to_serialize = parameters.serialize()
586-
headers = {'Accept': "application/json", 'Accept-Encoding': "gzip"}
545+
headers = {}
587546
if self.user_key is not None:
588547
headers["X-RosetteAPI-Key"] = self.user_key
589-
headers['Content-Type'] = "application/json"
590-
r = _post_http(url, params_to_serialize, headers)
591-
# pprint.pprint(headers)
592-
# pprint.pprint(url)
593-
# pprint.pprint(params_to_serialize)
548+
if self.useMultipart:
549+
headers = {'Content-Disposition': 'attachment'}
550+
params = dict((key,value) for key, value in params_to_serialize.iteritems() if key == 'language')
551+
files = {'content': (parameters.file_name, params_to_serialize["content"], 'text/plain'),
552+
'request': ('request_options', json.dumps(params), 'application/json')}
553+
request = requests.Request('POST', url, files=files, headers=headers, params=[])
554+
prepared_request = request.prepare()
555+
session = requests.Session()
556+
resp = session.send(prepared_request)
557+
rdata = resp.content
558+
response_headers = {"responseHeaders": dict(resp.headers)}
559+
status = resp.status_code
560+
r = _ReturnObject(_my_loads(rdata, response_headers), status)
561+
else:
562+
if self.debug:
563+
url = add_query(url, "debug", "true")
564+
self.logger.info('operate: ' + url)
565+
headers['Accept'] = "application/json"
566+
headers['Accept-Encoding'] = "gzip"
567+
headers['Content-Type'] = "application/json"
568+
r = _post_http(url, params_to_serialize, headers)
594569
return self.__finish_result(r, "operate")
595570

596571

@@ -612,7 +587,6 @@ def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1/'
612587
self.logger = logging.getLogger('rosette.api')
613588
self.logger.info('Initialized on ' + self.service_url)
614589
self.debug = debug
615-
self.useMultipart = False
616590
self.version_checked = False
617591

618592
global N_RETRIES
@@ -639,9 +613,6 @@ def check_version(self):
639613
self.version_checked = True
640614
return True
641615

642-
def _set_use_multipart(self, value):
643-
self.useMultipart = value
644-
645616
def ping(self):
646617
"""
647618
Create a ping L{EndpointCaller} for the server and ping it.

0 commit comments

Comments
 (0)