|
13 | 13 | with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: |
14 | 14 | config = {'conversion_target': DocumentConversionV1.NORMALIZED_HTML} |
15 | 15 | print(document_conversion.convert_document(document=document, config=config, media_type='text/html') |
16 | | - .content.decode('utf-8')) |
| 16 | + .content) |
17 | 17 |
|
18 | 18 | # Example with JSON |
19 | 19 | with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: |
20 | 20 | config['conversion_target'] = DocumentConversionV1.ANSWER_UNITS |
21 | 21 | print(json.dumps(document_conversion.convert_document(document=document, config=config), indent=2)) |
| 22 | + |
| 23 | +# Examples of index_document API |
| 24 | +print("########## Example of a dry run of index_document with only a document ##########") |
| 25 | +with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: |
| 26 | + config = { |
| 27 | + 'retrieve_and_rank': { |
| 28 | + 'dry_run': 'true' |
| 29 | + } |
| 30 | + } |
| 31 | + print(json.dumps(document_conversion.index_document(config=config, document=document), indent=2)) |
| 32 | + |
| 33 | +print("########## Example of a dry run of index_document with only metadata ##########") |
| 34 | +config = { |
| 35 | + 'retrieve_and_rank': { |
| 36 | + 'dry_run': 'true' |
| 37 | + } |
| 38 | +} |
| 39 | +metadata = { |
| 40 | + 'metadata': [ |
| 41 | + {'name': 'id', 'value': '12345'} |
| 42 | + ] |
| 43 | +} |
| 44 | +print(json.dumps(document_conversion.index_document(config=config, metadata=metadata), indent=2)) |
| 45 | + |
| 46 | +print("########## Example of a dry run of index_document with document and metadata ##########") |
| 47 | +with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: |
| 48 | + config = { |
| 49 | + 'retrieve_and_rank': { |
| 50 | + 'dry_run': 'true' |
| 51 | + } |
| 52 | + } |
| 53 | + metadata = { |
| 54 | + 'metadata': [ |
| 55 | + {'name': 'id', 'value': '12345'} |
| 56 | + ] |
| 57 | + } |
| 58 | + print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2)) |
| 59 | + |
| 60 | +print("########## Example of a dry run of index_document with document, metadata, and additional config for conversion ##########") |
| 61 | +with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: |
| 62 | + config = { |
| 63 | + 'convert_document': { |
| 64 | + 'normalized_html': { |
| 65 | + 'exclude_content': {"xpaths": ["//body/div"]} |
| 66 | + } |
| 67 | + }, |
| 68 | + 'retrieve_and_rank': { |
| 69 | + 'dry_run': 'true' |
| 70 | + } |
| 71 | + } |
| 72 | + metadata = { |
| 73 | + 'metadata': [ |
| 74 | + {'name': 'id', 'value': '12345'} |
| 75 | + ] |
| 76 | + } |
| 77 | + print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2)) |
| 78 | + |
| 79 | +# print("########## Example of index_document with document, metadata (A service instance id, SOLR cluster id, and " |
| 80 | +# "a SOLR collection name must be provided from the Retrieve and Rank service in order to index) ##########") |
| 81 | +# with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: |
| 82 | +# config = { |
| 83 | +# 'retrieve_and_rank': { |
| 84 | +# 'dry_run': 'false', |
| 85 | +# 'service_instance_id': 'YOUR RETRIEVE AND RANK SERVICE INSTANCE ID', |
| 86 | +# 'cluster_id': 'YOUR RETRIEVE AND RANK SERVICE SOLR CLUSTER ID', |
| 87 | +# 'search_collection': 'YOUR RETRIEVE AND RANK SERVICE SOLR SEARCH COLLECTION NAME' |
| 88 | +# } |
| 89 | +# } |
| 90 | +# metadata = { |
| 91 | +# 'metadata': [ |
| 92 | +# {'name': 'id', 'value': '12345'} |
| 93 | +# ] |
| 94 | +# } |
| 95 | +# print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2)) |
0 commit comments