@@ -301,7 +301,7 @@ def parse(option, urlOrPaths, serverEndpoint=ServerEndpoint, verbose=Verbose, ti
301301
302302def parse1 (option , urlOrPath , serverEndpoint = ServerEndpoint , verbose = Verbose , tikaServerJar = TikaServerJar ,
303303 responseMimeType = 'application/json' ,
304- services = {'meta' : '/meta' , 'text' : '/tika' , 'all' : '/rmeta/text' }, rawResponse = False , headers = None , config_path = None ):
304+ services = {'meta' : '/meta' , 'text' : '/tika' , 'all' : '/rmeta/text' }, rawResponse = False , headers = None , config_path = None , requestOptions = {} ):
305305 '''
306306 Parse the object and return extracted metadata and/or text in JSON format.
307307 :param option:
@@ -326,7 +326,7 @@ def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, ti
326326 if service == '/tika' : responseMimeType = 'text/plain'
327327 headers .update ({'Accept' : responseMimeType , 'Content-Disposition' : make_content_disposition_header (path )})
328328 status , response = callServer ('put' , serverEndpoint , service , open (path , 'rb' ),
329- headers , verbose , tikaServerJar , config_path = config_path , rawResponse = rawResponse )
329+ headers , verbose , tikaServerJar , config_path = config_path , rawResponse = rawResponse , requestOptions = requestOptions )
330330
331331 if file_type == 'remote' : os .unlink (path )
332332 return (status , response )
@@ -351,7 +351,7 @@ def detectLang(option, urlOrPaths, serverEndpoint=ServerEndpoint, verbose=Verbos
351351
352352def detectLang1 (option , urlOrPath , serverEndpoint = ServerEndpoint , verbose = Verbose , tikaServerJar = TikaServerJar ,
353353 responseMimeType = 'text/plain' ,
354- services = {'file' : '/language/stream' }):
354+ services = {'file' : '/language/stream' }, requestOptions = {} ):
355355 '''
356356 Detect the language of the provided stream and return its 2 character code as text/plain.
357357 :param option:
@@ -369,7 +369,7 @@ def detectLang1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbos
369369 raise TikaException ('Language option must be one of %s ' % binary_string (services .keys ()))
370370 service = services [option ]
371371 status , response = callServer ('put' , serverEndpoint , service , open (path , 'rb' ),
372- {'Accept' : responseMimeType }, verbose , tikaServerJar )
372+ {'Accept' : responseMimeType }, verbose , tikaServerJar , requestOptions = requestOptions )
373373 return (status , response )
374374
375375def doTranslate (option , urlOrPaths , serverEndpoint = ServerEndpoint , verbose = Verbose , tikaServerJar = TikaServerJar ,
@@ -392,7 +392,7 @@ def doTranslate(option, urlOrPaths, serverEndpoint=ServerEndpoint, verbose=Verbo
392392
393393def doTranslate1 (option , urlOrPath , serverEndpoint = ServerEndpoint , verbose = Verbose , tikaServerJar = TikaServerJar ,
394394 responseMimeType = 'text/plain' ,
395- services = {'all' : '/translate/all' }):
395+ services = {'all' : '/translate/all' }, requestOptions = {} ):
396396 '''
397397
398398 :param option:
@@ -424,7 +424,7 @@ def doTranslate1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbo
424424 service = services ["all" ] + "/" + Translator + "/" + destLang
425425 status , response = callServer ('put' , serverEndpoint , service , open (path , 'rb' ),
426426 {'Accept' : responseMimeType },
427- verbose , tikaServerJar )
427+ verbose , tikaServerJar , requestOptions = requestOptions )
428428 return (status , response )
429429
430430def detectType (option , urlOrPaths , serverEndpoint = ServerEndpoint , verbose = Verbose , tikaServerJar = TikaServerJar ,
@@ -447,7 +447,7 @@ def detectType(option, urlOrPaths, serverEndpoint=ServerEndpoint, verbose=Verbos
447447
448448def detectType1 (option , urlOrPath , serverEndpoint = ServerEndpoint , verbose = Verbose , tikaServerJar = TikaServerJar ,
449449 responseMimeType = 'text/plain' ,
450- services = {'type' : '/detect/stream' }, config_path = None ):
450+ services = {'type' : '/detect/stream' }, config_path = None , requestOptions = {} ):
451451 '''
452452 Detect the MIME/media type of the stream and return it in text/plain.
453453 :param option:
@@ -469,14 +469,14 @@ def detectType1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbos
469469 'Accept' : responseMimeType ,
470470 'Content-Disposition' : make_content_disposition_header (path )
471471 },
472- verbose , tikaServerJar , config_path = config_path )
472+ verbose , tikaServerJar , config_path = config_path , requestOptions = requestOptions )
473473 if csvOutput == 1 :
474474 return (status , urlOrPath .decode ("UTF-8" ) + "," + response )
475475 else :
476476 return (status , response )
477477
478478def getConfig (option , serverEndpoint = ServerEndpoint , verbose = Verbose , tikaServerJar = TikaServerJar , responseMimeType = 'application/json' ,
479- services = {'mime-types' : '/mime-types' , 'detectors' : '/detectors' , 'parsers' : '/parsers/details' }):
479+ services = {'mime-types' : '/mime-types' , 'detectors' : '/detectors' , 'parsers' : '/parsers/details' }, requestOptions = {} ):
480480 '''
481481 Get the configuration of the Tika Server (parsers, detectors, etc.) and return it in JSON format.
482482 :param option:
@@ -490,13 +490,12 @@ def getConfig(option, serverEndpoint=ServerEndpoint, verbose=Verbose, tikaServer
490490 if option not in services :
491491 die ('config option must be one of mime-types, detectors, or parsers' )
492492 service = services [option ]
493- status , response = callServer ('get' , serverEndpoint , service , None , {'Accept' : responseMimeType }, verbose , tikaServerJar )
493+ status , response = callServer ('get' , serverEndpoint , service , None , {'Accept' : responseMimeType }, verbose , tikaServerJar , requestOptions = requestOptions )
494494 return (status , response )
495495
496-
497496def callServer (verb , serverEndpoint , service , data , headers , verbose = Verbose , tikaServerJar = TikaServerJar ,
498497 httpVerbs = {'get' : requests .get , 'put' : requests .put , 'post' : requests .post }, classpath = None ,
499- rawResponse = False ,config_path = None ):
498+ rawResponse = False ,config_path = None , requestOptions = {} ):
500499 '''
501500 Call the Tika Server, do some error checking, and return the response.
502501 :param verb:
@@ -535,7 +534,15 @@ def callServer(verb, serverEndpoint, service, data, headers, verbose=Verbose, ti
535534 if type (data ) is unicode_string :
536535 encodedData = data .encode ('utf-8' )
537536
538- resp = verbFn (serviceUrl , encodedData , headers = headers , verify = False )
537+ requestOptionsDefault = {
538+ 'timeout' : 60 ,
539+ 'headers' : headers ,
540+ 'verify' : False
541+ }
542+ effectiveRequestOptions = requestOptionsDefault .copy ()
543+ effectiveRequestOptions .update (requestOptions )
544+
545+ resp = verbFn (serviceUrl , encodedData , ** effectiveRequestOptions )
539546 if verbose :
540547 print (sys .stderr , "Request headers: " , headers )
541548 print (sys .stderr , "Response headers: " , resp .headers )
0 commit comments