Merge branch 'samir-patel-master'

jsstylos · jsstylos · commit b565f5d25aae · 2016-07-06T13:19:56.000-04:00
diff --git a/examples/document_conversion_v1.py b/examples/document_conversion_v1.py
@@ -13,9 +13,83 @@
 with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
     config = {'conversion_target': DocumentConversionV1.NORMALIZED_HTML}
     print(document_conversion.convert_document(document=document, config=config, media_type='text/html')
-          .content.decode('utf-8'))
+          .content)
 
 # Example with JSON
 with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
     config['conversion_target'] = DocumentConversionV1.ANSWER_UNITS
     print(json.dumps(document_conversion.convert_document(document=document, config=config), indent=2))
+
+# Examples of index_document API
+print("########## Example of a dry run of index_document with only a document ##########")
+with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
+    config = {
+        'retrieve_and_rank': {
+            'dry_run': 'true'
+        }
+    }
+    print(json.dumps(document_conversion.index_document(config=config, document=document), indent=2))
+
+print("########## Example of a dry run of index_document with only metadata ##########")
+config = {
+    'retrieve_and_rank': {
+        'dry_run': 'true'
+    }
+}
+metadata = {
+    'metadata': [
+        {'name': 'id', 'value': '12345'}
+    ]
+}
+print(json.dumps(document_conversion.index_document(config=config, metadata=metadata), indent=2))
+
+print("########## Example of a dry run of index_document with document and metadata ##########")
+with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
+    config = {
+        'retrieve_and_rank': {
+            'dry_run': 'true'
+        }
+    }
+    metadata = {
+        'metadata': [
+            {'name': 'id', 'value': '12345'}
+        ]
+    }
+    print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2))
+
+print("########## Example of a dry run of index_document with document, metadata, and additional config for conversion ##########")
+with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
+    config = {
+        'convert_document': {
+            'normalized_html': {
+                'exclude_content': {"xpaths": ["//body/div"]}
+            }
+        },
+        'retrieve_and_rank': {
+            'dry_run': 'true'
+        }
+    }
+    metadata = {
+        'metadata': [
+            {'name': 'id', 'value': '12345'}
+        ]
+    }
+    print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2))
+
+# print("########## Example of index_document with document, metadata (A service instance id, SOLR cluster id, and "
+#       "a SOLR collection name must be provided from the Retrieve and Rank service in order to index) ##########")
+# with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
+#     config = {
+#         'retrieve_and_rank': {
+#             'dry_run': 'false',
+#             'service_instance_id': 'YOUR RETRIEVE AND RANK SERVICE INSTANCE ID',
+#             'cluster_id': 'YOUR RETRIEVE AND RANK SERVICE SOLR CLUSTER ID',
+#             'search_collection': 'YOUR RETRIEVE AND RANK SERVICE SOLR SEARCH COLLECTION NAME'
+#         }
+#     }
+#     metadata = {
+#         'metadata': [
+#             {'name': 'id', 'value': '12345'}
+#         ]
+#     }
+#     print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2))
diff --git a/resources/simple.html b/resources/simple.html
@@ -0,0 +1,9 @@
+<html>
+<head>
+  <title>Simple HTML Page</title>
+</head>
+<body>
+  <h1>Chapter 1</h1>
+  <p>The content of the first chapter.</p>
+</body>
+</html>
diff --git a/test/test_document_conversion_v1.py b/test/test_document_conversion_v1.py
@@ -0,0 +1,48 @@
+# coding=utf-8
+import os
+import responses
+import watson_developer_cloud
+
+
+@responses.activate
+def test_success():
+    convert_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/convert_document'
+    convert_response = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><html>' \
+                         '<head><title>Simple HTML Page</title></head>' \
+                         '<body><h1>Chapter 1</h1><p>The content of the first chapter.</p></body></html>'
+    document_conversion = watson_developer_cloud.DocumentConversionV1(
+        username="username", password="password", version='2015-12-15')
+
+    responses.add(responses.POST, convert_url,
+                  body=convert_response, status=200,
+                  content_type='application/json')
+
+    with open(os.path.join(os.path.dirname(__file__), '../resources/simple.html'), 'r') as document:
+        convertConfig = {'conversion_target': watson_developer_cloud.DocumentConversionV1.NORMALIZED_HTML}
+        document_conversion.convert_document(document=document, config=convertConfig, media_type='text/html')
+
+    assert responses.calls[0].request.url.startswith(convert_url)
+    assert responses.calls[0].response.text == convert_response
+
+    index_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/index_document'
+    index_response = '{"status": "success"}'
+
+    responses.add(responses.POST, index_url,
+                  body=index_response, status=200,
+                  content_type='application/json')
+
+    with open(os.path.join(os.path.dirname(__file__), '../resources/example.html'), 'r') as document:
+        indexConfig = {
+            'retrieve_and_rank': {
+                'dry_run':'false',
+                'service_instance_id':'serviceInstanceId',
+                'cluster_id':'clusterId',
+                'search_collection':'searchCollectionName'
+            }
+        }
+        document_conversion.index_document(config=indexConfig, document=document)
+
+    assert responses.calls[1].request.url.startswith(index_url)
+    assert responses.calls[1].response.text == index_response
+
+    assert len(responses.calls) == 2
diff --git a/watson_developer_cloud/document_conversion_v1.py b/watson_developer_cloud/document_conversion_v1.py
@@ -41,3 +41,16 @@ def convert_document(self, document, config, media_type=None):
         accept_json = config['conversion_target'] == DocumentConversionV1.ANSWER_UNITS
         return self.request(method='POST', url='/v1/convert_document', files=files, params=params,
                             accept_json=accept_json)
+
+    def index_document(self, config, document=None, metadata=None, media_type=None):
+        if document is None and metadata is None:
+            raise AssertionError('Missing required parameters: document or metadata. At least one of those is required.')
+        params = {'version': self.version}
+        files = [('config', ('config.json', json.dumps(config), 'application/json'))]
+        if document != None:
+            filename = os.path.basename(document.name)
+            file_tuple = (filename, document, media_type) if media_type else (filename, document)
+            files.append(('file', file_tuple))
+        if metadata != None:
+            files.append(('metadata', ('metadata.json', json.dumps(metadata), 'application/json')))
+        return self.request(method='POST', url='/v1/index_document', files=files, params=params, accept_json=True)