Skip to content

Commit 0ea9cb0

Browse files
author
Chris Park
committed
Merge branch 'fhasanaj-RCB-289' into develop
2 parents 7feed24 + 647516a commit 0ea9cb0

File tree

4 files changed

+45
-105
lines changed

4 files changed

+45
-105
lines changed

docker/tox.ini

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ envlist = py26, py27, py33, py34
99

1010
[testenv]
1111
commands =
12-
py.test {toxinidir}/tests
12+
py.test {toxinidir}/tests -s
1313
deps =
1414
pytest
1515
pytest-pep8
1616
httpretty==0.8.10
17-
epydoc
17+
epydoc
18+
requests

examples/base64_input.py

Lines changed: 0 additions & 31 deletions
This file was deleted.

examples/sentiment.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,4 @@ def run(key, altUrl='https://api.rosette.com/rest/v1/'):
4343
if __name__ == '__main__':
4444
args = parser.parse_args()
4545
result = run(args.key, args.url)
46-
print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8"))
46+
print(json.dumps(result, indent=2, ensure_ascii=False, sort_keys=True).encode("utf8"))

rosette/api.py

Lines changed: 41 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@
2727
import os
2828
from socket import gethostbyname, gaierror
2929
from datetime import datetime
30+
import requests
3031

31-
_BINDING_VERSION = "0.8"
32+
_BINDING_VERSION = "0.10"
3233
_GZIP_BYTEARRAY = bytearray([0x1F, 0x8b, 0x08])
3334
N_RETRIES = 3
3435
HTTP_CONNECTION = None
@@ -229,24 +230,6 @@ def validate(cls, value, name):
229230
" is not one of " + ", ".join(values) + ".", repr(value))
230231

231232

232-
class DataFormat(_PseudoEnum):
233-
"""Data Format, as much as it is known."""
234-
SIMPLE = "text/plain"
235-
"""The data is unstructured text, supplied as a possibly-unicode string."""
236-
JSON = "application/json"
237-
"""To be supplied. The API uses JSON internally, but that is not what this refers to."""
238-
HTML = "text/html"
239-
"""The data is a 'loose' HTML page; that is, it may not be HTML-compliant, or may even not
240-
really be HTML. The data must be a narrow (single-byte) string, not a python Unicode string,
241-
perhaps read from a file. (Of course, it can be UTF-8 encoded)."""
242-
XHTML = "application/xhtml+xml"
243-
"""The data is a compliant XHTML page. The data must be a narrow (single-byte) string, not a
244-
python Unicode string, perhaps read from a file. (Of course, it can be UTF-8 encoded)."""
245-
UNSPECIFIED = "application/octet-stream"
246-
"""The data is of unknown format, it may be a binary data type (the contents of a binary file),
247-
or may not. It will be sent as is and identified and analyzed by the server."""
248-
249-
250233
class MorphologyOutput(_PseudoEnum):
251234
LEMMAS = "lemmas"
252235
PARTS_OF_SPEECH = "parts-of-speech"
@@ -298,7 +281,7 @@ def _byteify(s): # py 3 only
298281
class DocumentParameters(_DocumentParamSetBase):
299282
"""Parameter object for all operations requiring input other than
300283
translated_name.
301-
Three fields, C{content}, C{contentType}, and C{inputUri}, are set via
284+
Two fields, C{content} and C{inputUri}, are set via
302285
the subscript operator, e.g., C{params["content"]}, or the
303286
convenience instance methods L{DocumentParameters.load_document_file}
304287
and L{DocumentParameters.load_document_string}.
@@ -307,13 +290,14 @@ class DocumentParameters(_DocumentParamSetBase):
307290
308291
If the field C{contentUri} is set to the URL of a web page (only
309292
protocols C{http, https, ftp, ftps} are accepted), the server will
310-
fetch the content from that web page. In this case, neither C{content}
311-
nor C{contentType} may be set.
293+
fetch the content from that web page. In this case, C{content} may not be set.
312294
"""
313295

314296
def __init__(self):
315297
"""Create a L{DocumentParameters} object."""
316-
_DocumentParamSetBase.__init__(self, ("content", "contentUri", "contentType", "language"))
298+
_DocumentParamSetBase.__init__(self, ("content", "contentUri", "language"))
299+
self.file_name = ""
300+
self.useMultipart = False
317301

318302
def validate(self):
319303
"""Internal. Do not use."""
@@ -328,46 +312,26 @@ def serialize(self):
328312
"""Internal. Do not use."""
329313
self.validate()
330314
slz = super(DocumentParameters, self).serialize()
331-
if self["contentType"] is None and self["contentUri"] is None:
332-
slz["contentType"] = DataFormat.SIMPLE
333-
elif self["contentType"] in (DataFormat.HTML, DataFormat.XHTML, DataFormat.UNSPECIFIED):
334-
content = slz["content"]
335-
if _IsPy3 and isinstance(content, str):
336-
content = _byteify(content)
337-
encoded = content
338-
if _IsPy3:
339-
encoded = encoded.decode("utf-8") # if py3, need chars.
340-
slz["content"] = encoded
341315
return slz
342316

343-
def load_document_file(self, path, data_type=DataFormat.UNSPECIFIED):
317+
def load_document_file(self, path):
344318
"""Loads a file into the object.
345319
The file will be read as bytes; the appropriate conversion will
346320
be determined by the server.
347321
@parameter path: Pathname of a file acceptable to the C{open} function.
348-
@parameter data_type: One of L{DataFormat.HTML}, L{DataFormat.XHTML}, or L{DataFormat.UNSPECIFIED}.
349-
No other types are acceptable at this time, although HTML is broad enough to include text strings
350-
without markup.
351-
If the data type is unknown, or describes a binary file, use the default (L{DataFormat.UNSPECIFIED}).
352-
@type data_type: L{DataFormat}
353322
"""
354-
if data_type not in (DataFormat.HTML, DataFormat.XHTML, DataFormat.UNSPECIFIED):
355-
raise RosetteException("badArgument", "Must supply one of HTML, XHTML, or UNSPECIFIED", data_type)
356-
self.load_document_string(open(path, "rb").read(), data_type)
323+
self.useMultipart = True
324+
self.file_name = path
325+
self.load_document_string(open(path, "rb").read())
357326

358-
def load_document_string(self, s, data_type):
327+
def load_document_string(self, s):
359328
"""Loads a string into the object.
360329
The string will be taken as bytes or as Unicode dependent upon
361-
its native python type and the data type asked for; if the
362-
type is HTML or XHTML, bytes, not python Unicode, are expected,
363-
the encoding to be determined by the server.
330+
its native python type.
364331
@parameter s: A string, possibly a unicode-string, to be loaded
365-
for subsequent analysis, as per the C{data_type}.
366-
@parameter data_type: The data type of the string, as per L{DataFormat}.
367-
@type data_type: L{DataFormat}
332+
for subsequent analysis.
368333
"""
369334
self["content"] = s
370-
self["contentType"] = data_type
371335

372336

373337
class RelationshipsParameters(DocumentParameters):
@@ -376,7 +340,8 @@ class RelationshipsParameters(DocumentParameters):
376340
to specify the relationships-unique options parameter."""
377341
def __init__(self):
378342
"""Create a L{RelationshipsParameters} object."""
379-
_DocumentParamSetBase.__init__(self, ("content", "contentUri", "contentType", "language", "options"))
343+
self.useMultipart = False
344+
_DocumentParamSetBase.__init__(self, ("content", "contentUri", "language", "options"))
380345

381346

382347
class NameTranslationParameters(_DocumentParamSetBase):
@@ -405,6 +370,7 @@ class NameTranslationParameters(_DocumentParamSetBase):
405370
"""
406371

407372
def __init__(self):
373+
self.useMultipart = False
408374
_DocumentParamSetBase.__init__(self, ("name", "targetLanguage", "entityType", "sourceLanguageOfOrigin",
409375
"sourceLanguageOfUse", "sourceScript", "targetScript", "targetScheme"))
410376

@@ -435,6 +401,7 @@ class NameSimilarityParameters(_DocumentParamSetBase):
435401
"""
436402

437403
def __init__(self):
404+
self.useMultipart = False
438405
_DocumentParamSetBase.__init__(self, ("name1", "name2"))
439406

440407
def validate(self):
@@ -468,7 +435,7 @@ def __init__(self, api, suburl):
468435
self.service_url = api.service_url
469436
self.user_key = api.user_key
470437
self.logger = api.logger
471-
self.useMultipart = api.useMultipart
438+
self.useMultipart = False
472439
self.checker = lambda: api.check_version()
473440
self.suburl = suburl
474441
self.debug = api.debug
@@ -497,9 +464,6 @@ def __finish_result(self, r, ename):
497464
complaint_url + " : failed to communicate with Rosette",
498465
msg)
499466

500-
def _set_use_multipart(self, value):
501-
self.useMultipart = value
502-
503467
def info(self):
504468
"""Issues an "info" request to the L{EndpointCaller}'s specific endpoint.
505469
@return: A dictionary telling server version and other
@@ -574,22 +538,32 @@ def call(self, parameters):
574538

575539
self.checker()
576540

577-
if self.useMultipart and (parameters['contentType'] != DataFormat.SIMPLE):
578-
raise RosetteException("incompatible", "Multipart requires contentType SIMPLE",
579-
repr(parameters['contentType']))
541+
self.useMultipart = parameters.useMultipart
580542
url = self.service_url + self.suburl
581-
if self.debug:
582-
url = add_query(url, "debug", "true")
583-
self.logger.info('operate: ' + url)
584543
params_to_serialize = parameters.serialize()
585-
headers = {'Accept': "application/json", 'Accept-Encoding': "gzip"}
544+
headers = {}
586545
if self.user_key is not None:
587546
headers["X-RosetteAPI-Key"] = self.user_key
588-
headers['Content-Type'] = "application/json"
589-
r = _post_http(url, params_to_serialize, headers)
590-
# pprint.pprint(headers)
591-
# pprint.pprint(url)
592-
# pprint.pprint(params_to_serialize)
547+
if self.useMultipart:
548+
params = dict((key,value) for key, value in params_to_serialize.iteritems() if key == 'language')
549+
files = {'content': (os.path.basename(parameters.file_name), params_to_serialize["content"], 'text/plain'),
550+
'request': ('request_options', json.dumps(params), 'application/json')}
551+
request = requests.Request('POST', url, files=files)
552+
prepared_request = request.prepare()
553+
session = requests.Session()
554+
resp = session.send(prepared_request)
555+
rdata = resp.content
556+
response_headers = {"responseHeaders": dict(resp.headers)}
557+
status = resp.status_code
558+
r = _ReturnObject(_my_loads(rdata, response_headers), status)
559+
else:
560+
if self.debug:
561+
url = add_query(url, "debug", "true")
562+
self.logger.info('operate: ' + url)
563+
headers['Accept'] = "application/json"
564+
headers['Accept-Encoding'] = "gzip"
565+
headers['Content-Type'] = "application/json"
566+
r = _post_http(url, params_to_serialize, headers)
593567
return self.__finish_result(r, "operate")
594568

595569

@@ -611,7 +585,6 @@ def __init__(self, user_key=None, service_url='https://api.rosette.com/rest/v1/'
611585
self.logger = logging.getLogger('rosette.api')
612586
self.logger.info('Initialized on ' + self.service_url)
613587
self.debug = debug
614-
self.useMultipart = False
615588
self.version_checked = False
616589

617590
global N_RETRIES
@@ -638,9 +611,6 @@ def check_version(self):
638611
self.version_checked = True
639612
return True
640613

641-
def _set_use_multipart(self, value):
642-
self.useMultipart = value
643-
644614
def ping(self):
645615
"""
646616
Create a ping L{EndpointCaller} for the server and ping it.

0 commit comments

Comments
 (0)