2121import json
2222
2323def from_file (filename , serverEndpoint = ServerEndpoint , xmlContent = False ):
24+ '''
25+ Parses a file for metadata and content
26+ :param filename: path to file which needs to be parsed
27+ :param serverEndpoint: Server endpoint url
28+ :param xmlContent: Whether or not XML content be requested.
29+ Default is 'False', which results in text content.
30+ :return: dictionary having 'metadata' and 'content' keys.
31+ 'content' has a str value and metadata has a dict type value.
32+ '''
2433 if not xmlContent :
2534 jsonOutput = parse1 ('all' , filename , serverEndpoint )
2635 else :
@@ -29,16 +38,29 @@ def from_file(filename, serverEndpoint=ServerEndpoint, xmlContent=False):
2938
3039
3140def from_buffer (string , serverEndpoint = ServerEndpoint , xmlContent = False ):
41+ '''
42+ Parses the content from buffer
43+ :param string: Buffer value
44+ :param serverEndpoint: Server endpoint. This is optional
45+ :param xmlContent: Whether or not XML content be requested.
46+ Default is 'False', which results in text content.
47+ :return:
48+ '''
3249 if not xmlContent :
3350 status , response = callServer ('put' , serverEndpoint , '/rmeta/text' , string ,
3451 {'Accept' : 'application/json' }, False )
3552 else :
3653 status , response = callServer ('put' , serverEndpoint , '/rmeta/xml' , string ,
37- {'Accept' : 'application/json' }, False )
38-
54+ {'Accept' : 'application/json' }, False )
55+
3956 return _parse ((status ,response ))
4057
4158def _parse (jsonOutput ):
59+ '''
60+ Parses JSON response from Tika REST API server
61+ :param jsonOutput: JSON output from Tika Server
62+ :return: a dictionary having 'metadata' and 'content' values
63+ '''
4264 parsed = {}
4365 if not jsonOutput :
4466 return parsed
@@ -62,7 +84,7 @@ def _parse(jsonOutput):
6284 if n != "X-TIKA:content" :
6385 if n in parsed ["metadata" ]:
6486 if not isinstance (parsed ["metadata" ][n ], list ):
65- parsed ["metadata" ][n ] = [parsed ["metadata" ][n ]]
87+ parsed ["metadata" ][n ] = [parsed ["metadata" ][n ]]
6688 parsed ["metadata" ][n ].append (js [n ])
6789 else :
6890 parsed ["metadata" ][n ] = js [n ]
0 commit comments