@@ -136,7 +136,7 @@ def make_content_disposition_header(fn):
136136log_file = os .path .join (log_path , 'tika.log' )
137137
138138logFormatter = logging .Formatter ("%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s" )
139- log = logging .getLogger ()
139+ log = logging .getLogger ('tika.tika' )
140140
141141# File logs
142142fileHandler = logging .FileHandler (log_file )
@@ -272,7 +272,7 @@ def parseAndSave(option, urlOrPaths, outDir=None, serverEndpoint=ServerEndpoint,
272272
273273def parse (option , urlOrPaths , serverEndpoint = ServerEndpoint , verbose = Verbose , tikaServerJar = TikaServerJar ,
274274 responseMimeType = 'application/json' ,
275- services = {'meta' : '/meta' , 'text' : '/tika' , 'all' : '/rmeta' }):
275+ services = {'meta' : '/meta' , 'text' : '/tika' , 'all' : '/rmeta' }, rawResponse = False ):
276276 '''
277277 Parse the objects and return extracted metadata and/or text in JSON format.
278278 :param option:
@@ -289,7 +289,7 @@ def parse(option, urlOrPaths, serverEndpoint=ServerEndpoint, verbose=Verbose, ti
289289
290290def parse1 (option , urlOrPath , serverEndpoint = ServerEndpoint , verbose = Verbose , tikaServerJar = TikaServerJar ,
291291 responseMimeType = 'application/json' ,
292- services = {'meta' : '/meta' , 'text' : '/tika' , 'all' : '/rmeta/text' }):
292+ services = {'meta' : '/meta' , 'text' : '/tika' , 'all' : '/rmeta/text' }, rawResponse = False ):
293293 '''
294294 Parse the object and return extracted metadata and/or text in JSON format.
295295 :param option:
@@ -307,11 +307,9 @@ def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, ti
307307 service = services .get (option , services ['all' ])
308308 if service == '/tika' : responseMimeType = 'text/plain'
309309 status , response = callServer ('put' , serverEndpoint , service , open (path , 'rb' ),
310- {
311- 'Accept' : responseMimeType ,
312- 'Content-Disposition' : make_content_disposition_header (path )
313- },
314- verbose , tikaServerJar )
310+ {'Accept' : responseMimeType , 'Content-Disposition' : make_content_disposition_header (path )},
311+ verbose , tikaServerJar , rawResponse = rawResponse )
312+
315313
316314 if file_type == 'remote' : os .unlink (path )
317315 return (status , response )
@@ -479,8 +477,9 @@ def getConfig(option, serverEndpoint=ServerEndpoint, verbose=Verbose, tikaServer
479477 return (status , response )
480478
481479
482- def callServer (verb , serverEndpoint , service , data , headers , verbose = Verbose , tikaServerJar = TikaServerJar ,
483- httpVerbs = {'get' : requests .get , 'put' : requests .put , 'post' : requests .post },classpath = None ):
480+ def callServer (verb , serverEndpoint , service , data , headers , verbose = Verbose , tikaServerJar = TikaServerJar ,
481+ httpVerbs = {'get' : requests .get , 'put' : requests .put , 'post' : requests .post }, classpath = None ,
482+ rawResponse = False ):
484483 '''
485484 Call the Tika Server, do some error checking, and return the response.
486485 :param verb:
@@ -522,8 +521,12 @@ def callServer(verb, serverEndpoint, service, data, headers, verbose=Verbose, ti
522521 print (sys .stderr , "Response headers: " , resp .headers )
523522 if resp .status_code != 200 :
524523 log .warning ('Tika server returned status: %d' , resp .status_code )
524+
525525 resp .encoding = "utf-8"
526- return (resp .status_code , resp .text )
526+ if rawResponse :
527+ return (resp .status_code , resp .content )
528+ else :
529+ return (resp .status_code , resp .text )
527530
528531
529532def checkTikaServer (serverHost = ServerHost , port = Port , tikaServerJar = TikaServerJar ,classpath = None ):
0 commit comments