Merge pull request #2 from rosette-api/0.5.4

cp2boston · cp2boston · commit c4173d4dcf97 · 2015-08-28T14:57:23.000-04:00
Merge 0.5.4 to master
diff --git a/.gitignore b/.gitignore
@@ -1,11 +1,17 @@
 # General
 *~
-**/.DS_Store
+.DS_Store
+.DS_Store?
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
 
 # Jetbrains
 **/.idea/*
 !**/.idea/runConfigurations/
 *.iml
+target/
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/README.md b/README.md
@@ -0,0 +1,70 @@
+# This is the Python client binding for Rosette API.
+
+The Python binding requires Python 2.6 or greater and is available through pip:
+
+`pip install rosette_api`
+
+```python
+# 1. Set utf-8 encoding.
+# -*- coding: utf-8 -*-
+
+# 2. Imports from rosette.api.
+from rosette.api import API, DocumentParameters, MorphologyOutput
+
+# 3. Create API object.
+api = API("[your_api-key]")
+
+# 4. Create parameters object
+params = DocumentParameters()
+
+# 5. Set parameters.
+params["content"] = u"Was ist so böse an der Europäischen Zentralbank?"
+
+# 6. Make a call.
+result = api.morphology(params)
+
+# result is a Python dictionary that contains
+
+{u'lemmas': [{u'text': u'Was', u'lemma': u'was'}, {u'text': u'ist', u'lemma': u'sein'}, {u'text': u'so', u'lemma': u'so'}, {u'text': u'böse', u'lemma': u'böse'}, {u'text': u'an', u'lemma': u'an'}, {u'text': u'der', u'lemma': u'der'}, {u'text': u'Europäischen', u'lemma': u'europäisch'}, {u'text': u'Zentralbank', u'lemma': u'Zentralbank'}, {u'text': u'?', u'lemma': u'?'}]}
+```
+
+The samples use the following procedure:
+
+1. If the application reads text in, set encoding to utf-8 in the first line of the script.
+
+2. Import the `rosette.api` packages that your application needs. The `rosette.api` packages include
+    * `API`
+    * `DocumentParameters`
+    * `NameMatchingParameters`
+    * `NameTranslationParameters`
+    * `MorphologyOutput`
+    * `DataFormat`
+    * `InputUnit`
+
+3. Create an `API` object with the `user_key` parameter.
+
+4. Create a parameters object for your request input:
+
+   | Parameter | Endpoint |
+   | ----|----|
+   | `NameMatchingParameters` | for `/matched-name` |
+   | `NameTranslationParameters` | for `/translated-name` |
+   | `DocumentParameters` | for all other endpoints |
+
+
+5. Set the parameters required for your operation: "`content`" or "`contentUri`" for `DocumentParameters`;
+"`name`" and "`targetLanguage`" for `NameTranslationParameters`; "`name1.text`" and "`name2.text`" for
+ `NameMatchingParameters`; Other parameters are optional.
+
+6. Create an `API` method for the endpoint you are calling. The methods are
+    * `entities(linked)` where `linked` is `False` for entity extraction and `True` for entity linking.
+    * `categories()`
+    * `sentiment()`
+    * `language()`
+    * `morphology(tag)` where tag is a member of `MorphologyOutput`: `LEMMAS`, `PARTS_OF_SPEECH`, `COMPOUND_COMPONENTS`, `HAN_READINGS`, or `COMPLETE`. An empty tag is equivalent to `COMPLETE`.
+    * `sentences()`
+    * `tokens()`
+    * `matched_name()`
+    * `translated_name()`
+
+7. The API will return a dictionary with the results.
diff --git a/README.txt b/README.txt
diff --git a/examples/__init__.py b/examples/__init__.py
@@ -0,0 +1 @@
+__author__ = 'rhausmann'
diff --git a/examples/categories.py b/examples/categories.py
@@ -5,14 +5,14 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, DocumentParameters
 
 parser = argparse.ArgumentParser(description="Get the category of a piece of a document at a URL")
 parser.add_argument("--key", required=True, help="Rosette API key")
 parser.add_argument("--service_url", nargs="?", help="Optional user service URL")
-parser.add_argument("--url", nargs="?", default="http://www.basistech.com/about/", help="Optional URL for data")
+parser.add_argument("--url", nargs="?", default="https://en.wikipedia.org/wiki/Basis_Technology_Corp.", help="Optional URL for data")
 args = parser.parse_args()
 
 # Create an API instance
@@ -28,4 +28,4 @@
 
 result = api.categories(params)
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/entities.py b/examples/entities.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, DocumentParameters
 
@@ -21,9 +21,7 @@
     api = API(user_key=args.key)
 
 params = DocumentParameters()
-params["content"] = u"""President Obama urges the Congress and Speaker Boehner to pass the $50 billion spending bill
-based on Christian faith by July 1st or Washington will become totally dysfunctional,
-a terrible outcome for American people."""
+params["content"] = u"President Obama urges the Congress and Speaker Boehner to pass the $50 billion spending bill based on Christian faith by July 1st or Washington will become totally dysfunctional, a terrible outcome for American people."
 result = api.entities(params)  # entity linking is turned off
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/entities_linked.py b/examples/entities_linked.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, DocumentParameters
 
@@ -21,9 +21,7 @@
     api = API(user_key=args.key)
 
 params = DocumentParameters()
-params["content"] = u"""President Obama urges the Congress and Speaker Boehner to pass the $50 billion spending bill
-        based on Christian faith by July 1st or Washington will become totally dysfunctional,
-        a terrible outcome for American people."""
+params["content"] = u"President Obama urges the Congress and Speaker Boehner to pass the $50 billion spending bill based on Christian faith by July 1st or Washington will become totally dysfunctional, a terrible outcome for American people."
 result = api.entities(params, True)  # entity linking is turned on
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/info.py b/examples/info.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API
 
@@ -22,4 +22,4 @@
 
 result = api.info()
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/language.py b/examples/language.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, DocumentParameters
 
@@ -22,8 +22,7 @@
 
 params = DocumentParameters()
 
-# Use an HTML file to load data instead of a string
 params["content"] = u"Por favor Señorita, says the man."
 result = api.language(params)
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/matched-name.py b/examples/matched-name.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, NameMatchingParameters
 
@@ -25,4 +25,4 @@
 params["name2"] = {"text": "迈克尔·杰克逊", "entityType": "PERSON"}
 result = api.matched_name(params)
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/morphology_complete.py b/examples/morphology_complete.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, DocumentParameters
 
@@ -24,4 +24,4 @@
 params["content"] = u"The quick brown fox jumped over the lazy dog. Yes he did."
 result = api.morphology(params)
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/morphology_compound-components.py b/examples/morphology_compound-components.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, DocumentParameters, MorphologyOutput
 
@@ -24,4 +24,4 @@
 params["content"] = u"Rechtsschutzversicherungsgesellschaften"
 result = api.morphology(params, MorphologyOutput.COMPOUND_COMPONENTS)
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/morphology_han-readings.py b/examples/morphology_han-readings.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, DocumentParameters, MorphologyOutput
 
@@ -24,4 +24,4 @@
 params["content"] = u"北京大学生物系主任办公室内部会议"
 result = api.morphology(params, MorphologyOutput.HAN_READINGS)
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/morphology_lemmas.py b/examples/morphology_lemmas.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, DocumentParameters, MorphologyOutput
 
@@ -24,4 +24,4 @@
 params["content"] = u"The fact is that the geese just went back to get a rest and I'm not banking on their return soon"
 result = api.morphology(params, MorphologyOutput.LEMMAS)
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/morphology_parts-of-speech.py b/examples/morphology_parts-of-speech.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, DocumentParameters, MorphologyOutput
 
@@ -24,4 +24,4 @@
 params["content"] = u"The fact is that the geese just went back to get a rest and I'm not banking on their return soon"
 result = api.morphology(params, MorphologyOutput.PARTS_OF_SPEECH)
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/ping.py b/examples/ping.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API
 
@@ -22,4 +22,4 @@
 
 result = api.ping()
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/run_all.sh b/examples/run_all.sh
diff --git a/examples/sentences.py b/examples/sentences.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, DocumentParameters
 
@@ -21,18 +21,8 @@
     api = API(user_key=args.key)
 
 params = DocumentParameters()
-params["content"] = u"""
-This land is your land This land is my land
-From California to the New York island;
-From the red wood forest to the Gulf Stream waters
-
-This land was made for you and Me.
-
-As I was walking that ribbon of highway,
-I saw above me that endless skyway:
-I saw below me that golden valley:
-This land was made for you and me."""
+params["content"] = u"This land is your land. This land is my land\nFrom California to the New York island;\nFrom the red wood forest to the Gulf Stream waters\n\nThis land was made for you and Me.\n\nAs I was walking that ribbon of highway,\nI saw above me that endless skyway:\nI saw below me that golden valley:\nThis land was made for you and me."
 
 result = api.sentences(params)
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/sentiment.py b/examples/sentiment.py
@@ -5,17 +5,16 @@
 """
 
 import argparse
-import pprint
+import json
 import tempfile
+import os
 
 from rosette.api import API, DocumentParameters
 
 # Create default file to read from
-f = tempfile.NamedTemporaryFile(suffix=".html")
-message = """
-<html><head><title>Performance Report</title></head>
-<body><p>This article is clean, concise, and very easy to read.</p></body></html>
-"""
+# f = tempfile.NamedTemporaryFile(suffix=".html")
+f = open("testhtml.html", 'w')
+message = "<html><head><title>Performance Report</title></head><body><p>This article is clean, concise, and very easy to read.</p></body></html>"
 f.write(message)
 f.seek(0)
 
@@ -40,5 +39,6 @@
 
 # Clean up the file
 f.close()
+os.remove("testhtml.html")
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/tokens.py b/examples/tokens.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, DocumentParameters
 
@@ -24,4 +24,4 @@
 params["content"] = u"北京大学生物系主任办公室内部会议"
 result = api.tokens(params)
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/examples/translated-name.py b/examples/translated-name.py
@@ -5,7 +5,7 @@
 """
 
 import argparse
-import pprint
+import json
 
 from rosette.api import API, NameTranslationParameters
 
@@ -26,4 +26,4 @@
 params["targetLanguage"] = "eng"
 result = api.translated_name(params)
 
-pprint.pprint(result)
+print(json.dumps(result, indent=2, ensure_ascii=False).encode("utf8"))
diff --git a/rosette/api.py b/rosette/api.py
diff --git a/tests/mock-data/response/bad_info.json b/tests/mock-data/response/bad_info.json
diff --git a/tests/mock-data/response/bad_info.status b/tests/mock-data/response/bad_info.status
diff --git a/tests/mock-data/response/retry-fail.json b/tests/mock-data/response/retry-fail.json
diff --git a/tests/mock-data/response/retry-fail.status b/tests/mock-data/response/retry-fail.status
diff --git a/tests/test_rosette_api.py b/tests/test_rosette_api.py