Skip to content

Commit e780126

Browse files
committed
[document-conversion] Adds index document API
This commit adds the index_document API for the document conversion service for the python SDK. It also includes updated examples for using the index_document API.
1 parent 80ca9d0 commit e780126

File tree

2 files changed

+88
-1
lines changed

2 files changed

+88
-1
lines changed

examples/document_conversion_v1.py

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,83 @@
1313
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
1414
config = {'conversion_target': DocumentConversionV1.NORMALIZED_HTML}
1515
print(document_conversion.convert_document(document=document, config=config, media_type='text/html')
16-
.content.decode('utf-8'))
16+
.content)
1717

1818
# Example with JSON
1919
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
2020
config['conversion_target'] = DocumentConversionV1.ANSWER_UNITS
2121
print(json.dumps(document_conversion.convert_document(document=document, config=config), indent=2))
22+
23+
# Examples of index_document API
24+
print("########## Example of a dry run of index_document with only a document ##########")
25+
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
26+
config = {
27+
'retrieve_and_rank': {
28+
'dry_run':'true'
29+
}
30+
}
31+
print(json.dumps(document_conversion.index_document(config=config, document=document), indent=2))
32+
33+
print("########## Example of a dry run of index_document with only metadata ##########")
34+
config = {
35+
'retrieve_and_rank': {
36+
'dry_run':'true'
37+
}
38+
}
39+
metadata = {
40+
'metadata': [
41+
{'name':'id', 'value':'12345'}
42+
]
43+
}
44+
print(json.dumps(document_conversion.index_document(config=config, metadata=metadata), indent=2))
45+
46+
print("########## Example of a dry run of index_document with document and metadata ##########")
47+
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
48+
config = {
49+
'retrieve_and_rank': {
50+
'dry_run':'true'
51+
}
52+
}
53+
metadata = {
54+
'metadata': [
55+
{'name':'id', 'value':'12345'}
56+
]
57+
}
58+
print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2))
59+
60+
print("########## Example of a dry run of index_document with document, metadata, and additional config for conversion ##########")
61+
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
62+
config = {
63+
'convert_document': {
64+
'normalized_html': {
65+
'exclude_content': {"xpaths":["//body/div"]}
66+
}
67+
},
68+
'retrieve_and_rank': {
69+
'dry_run':'true'
70+
}
71+
}
72+
metadata = {
73+
'metadata': [
74+
{'name':'id', 'value':'12345'}
75+
]
76+
}
77+
print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2))
78+
79+
print("########## Example of index_document with document, metadata (A service instance id, SOLR cluster id, and "
80+
"a SOLR collection name must be provided from the Retrieve and Rank service in order to index) ##########")
81+
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
82+
config = {
83+
'retrieve_and_rank': {
84+
'dry_run':'false',
85+
'service_instance_id':'YOUR RETRIEVE AND RANK SERVICE INSTANCE ID',
86+
'cluster_id':'YOUR RETRIEVE AND RANK SERVICE SOLR CLUSTER ID',
87+
'search_collection':'YOUR RETRIEVE AND RANK SERVICE SOLR SEARCH COLLECTION NAME'
88+
}
89+
}
90+
metadata = {
91+
'metadata': [
92+
{'name':'id', 'value':'12345'}
93+
]
94+
}
95+
print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2))

watson_developer_cloud/document_conversion_v1.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,16 @@ def convert_document(self, document, config, media_type=None):
3737
accept_json = config['conversion_target'] == DocumentConversionV1.ANSWER_UNITS
3838
return self.request(method='POST', url='/v1/convert_document', files=files, params=params,
3939
accept_json=accept_json)
40+
41+
def index_document(self, config, document=None, metadata=None, media_type=None):
42+
if document == None and metadata == None:
43+
raise AssertionError('Missing required parameters: document or metadata. At least one of those is required.')
44+
params = {'version': self.version}
45+
files = [('config', ('config.json', json.dumps(config), 'application/json'))]
46+
if document != None:
47+
filename = os.path.basename(document.name)
48+
file_tuple = (filename, document, media_type) if media_type else (filename, document)
49+
files.append(('file', file_tuple))
50+
if metadata != None:
51+
files.append(('metadata', ('metadata.json', json.dumps(metadata), 'application/json')))
52+
return self.request(method='POST', url='/v1/index_document', files=files, params=params, accept_json=True)

0 commit comments

Comments
 (0)