Skip to content

Commit d33e91b

Browse files
committed
Merge branch 'master' of https://github.com/samir-patel/python-sdk into samir-patel-master
2 parents 1b0b6ec + c5f74f0 commit d33e91b

File tree

4 files changed

+145
-1
lines changed

4 files changed

+145
-1
lines changed

examples/document_conversion_v1.py

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,83 @@
1313
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
1414
config = {'conversion_target': DocumentConversionV1.NORMALIZED_HTML}
1515
print(document_conversion.convert_document(document=document, config=config, media_type='text/html')
16-
.content.decode('utf-8'))
16+
.content)
1717

1818
# Example with JSON
1919
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
2020
config['conversion_target'] = DocumentConversionV1.ANSWER_UNITS
2121
print(json.dumps(document_conversion.convert_document(document=document, config=config), indent=2))
22+
23+
# Examples of index_document API
24+
print("########## Example of a dry run of index_document with only a document ##########")
25+
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
26+
config = {
27+
'retrieve_and_rank': {
28+
'dry_run':'true'
29+
}
30+
}
31+
print(json.dumps(document_conversion.index_document(config=config, document=document), indent=2))
32+
33+
print("########## Example of a dry run of index_document with only metadata ##########")
34+
config = {
35+
'retrieve_and_rank': {
36+
'dry_run':'true'
37+
}
38+
}
39+
metadata = {
40+
'metadata': [
41+
{'name':'id', 'value':'12345'}
42+
]
43+
}
44+
print(json.dumps(document_conversion.index_document(config=config, metadata=metadata), indent=2))
45+
46+
print("########## Example of a dry run of index_document with document and metadata ##########")
47+
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
48+
config = {
49+
'retrieve_and_rank': {
50+
'dry_run':'true'
51+
}
52+
}
53+
metadata = {
54+
'metadata': [
55+
{'name':'id', 'value':'12345'}
56+
]
57+
}
58+
print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2))
59+
60+
print("########## Example of a dry run of index_document with document, metadata, and additional config for conversion ##########")
61+
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
62+
config = {
63+
'convert_document': {
64+
'normalized_html': {
65+
'exclude_content': {"xpaths":["//body/div"]}
66+
}
67+
},
68+
'retrieve_and_rank': {
69+
'dry_run':'true'
70+
}
71+
}
72+
metadata = {
73+
'metadata': [
74+
{'name':'id', 'value':'12345'}
75+
]
76+
}
77+
print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2))
78+
79+
print("########## Example of index_document with document, metadata (A service instance id, SOLR cluster id, and "
80+
"a SOLR collection name must be provided from the Retrieve and Rank service in order to index) ##########")
81+
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
82+
config = {
83+
'retrieve_and_rank': {
84+
'dry_run':'false',
85+
'service_instance_id':'YOUR RETRIEVE AND RANK SERVICE INSTANCE ID',
86+
'cluster_id':'YOUR RETRIEVE AND RANK SERVICE SOLR CLUSTER ID',
87+
'search_collection':'YOUR RETRIEVE AND RANK SERVICE SOLR SEARCH COLLECTION NAME'
88+
}
89+
}
90+
metadata = {
91+
'metadata': [
92+
{'name':'id', 'value':'12345'}
93+
]
94+
}
95+
print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2))

resources/simple.html

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<html>
2+
<head>
3+
<title>Simple HTML Page</title>
4+
</head>
5+
<body>
6+
<h1>Chapter 1</h1>
7+
<p>The content of the first chapter.</p>
8+
</body>
9+
</html>
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# coding=utf-8
2+
import os
3+
import responses
4+
import watson_developer_cloud
5+
6+
7+
@responses.activate
8+
def test_success():
9+
convert_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/convert_document'
10+
convert_response = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><html>' \
11+
'<head><title>Simple HTML Page</title></head>' \
12+
'<body><h1>Chapter 1</h1><p>The content of the first chapter.</p></body></html>'
13+
document_conversion = watson_developer_cloud.DocumentConversionV1(
14+
username="username", password="password", version='2015-12-15')
15+
16+
responses.add(responses.POST, convert_url,
17+
body=convert_response, status=200,
18+
content_type='application/json')
19+
20+
with open(os.path.join(os.path.dirname(__file__), '../resources/simple.html'), 'r') as document:
21+
convertConfig = {'conversion_target': watson_developer_cloud.DocumentConversionV1.NORMALIZED_HTML}
22+
document_conversion.convert_document(document=document, config=convertConfig, media_type='text/html')
23+
24+
assert responses.calls[0].request.url == convert_url
25+
assert responses.calls[0].response.text == convert_response
26+
27+
index_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/index_document'
28+
index_response = '{"status": "success"}'
29+
30+
responses.add(responses.POST, index_url,
31+
body=index_response, status=200,
32+
content_type='application/json')
33+
34+
with open(os.path.join(os.path.dirname(__file__), '../resources/example.html'), 'r') as document:
35+
indexConfig = {
36+
'retrieve_and_rank': {
37+
'dry_run':'false',
38+
'service_instance_id':'serviceInstanceId',
39+
'cluster_id':'clusterId',
40+
'search_collection':'searchCollectionName'
41+
}
42+
}
43+
document_conversion.index_document(config=indexConfig, document=document)
44+
45+
assert responses.calls[1].request.url == index_url
46+
assert responses.calls[1].response.text == index_response
47+
48+
assert len(responses.calls) == 2

watson_developer_cloud/document_conversion_v1.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,16 @@ def convert_document(self, document, config, media_type=None):
4141
accept_json = config['conversion_target'] == DocumentConversionV1.ANSWER_UNITS
4242
return self.request(method='POST', url='/v1/convert_document', files=files, params=params,
4343
accept_json=accept_json)
44+
45+
def index_document(self, config, document=None, metadata=None, media_type=None):
46+
if document is None and metadata is None:
47+
raise AssertionError('Missing required parameters: document or metadata. At least one of those is required.')
48+
params = {'version': self.version}
49+
files = [('config', ('config.json', json.dumps(config), 'application/json'))]
50+
if document != None:
51+
filename = os.path.basename(document.name)
52+
file_tuple = (filename, document, media_type) if media_type else (filename, document)
53+
files.append(('file', file_tuple))
54+
if metadata != None:
55+
files.append(('metadata', ('metadata.json', json.dumps(metadata), 'application/json')))
56+
return self.request(method='POST', url='/v1/index_document', files=files, params=params, accept_json=True)

0 commit comments

Comments
 (0)