4747 # Use auto Language detection feature
4848 print(translate.from_file('/path/to/file', 'destLang')
4949
50+ ***Tika-Python Configuration***
51+ You can now use custom configuration files. See https://tika.apache.org/1.18/configuring.html
52+ for details on writing configuration files. Configuration is set the first time the server is started.
53+ To use a configuration file with a parser, or detector:
54+ parsed = parser.from_file('/path/to/file', config_path='/path/to/configfile')
55+ or:
56+ detected = detector.from_file('/path/to/file', config_path='/path/to/configfile')
57+ or:
58+ detected = detector.from_buffer('some buffered content', config_path='/path/to/configfile')
5059
5160'''
5261
@@ -152,7 +161,7 @@ def make_content_disposition_header(fn):
152161log .setLevel (logging .INFO )
153162
154163Windows = True if platform .system () == "Windows" else False
155- TikaVersion = os .getenv ('TIKA_VERSION' , '1.18 ' )
164+ TikaVersion = os .getenv ('TIKA_VERSION' , '1.19 ' )
156165TikaJarPath = os .getenv ('TIKA_PATH' , tempfile .gettempdir ())
157166TikaFilesPath = tempfile .gettempdir ()
158167TikaServerLogFilePath = log_path
@@ -292,7 +301,7 @@ def parse(option, urlOrPaths, serverEndpoint=ServerEndpoint, verbose=Verbose, ti
292301
293302def parse1 (option , urlOrPath , serverEndpoint = ServerEndpoint , verbose = Verbose , tikaServerJar = TikaServerJar ,
294303 responseMimeType = 'application/json' ,
295- services = {'meta' : '/meta' , 'text' : '/tika' , 'all' : '/rmeta/text' }, rawResponse = False , headers = None ):
304+ services = {'meta' : '/meta' , 'text' : '/tika' , 'all' : '/rmeta/text' }, rawResponse = False , headers = None , config_path = None ):
296305 '''
297306 Parse the object and return extracted metadata and/or text in JSON format.
298307 :param option:
@@ -316,7 +325,7 @@ def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, ti
316325 service = services .get (option , services ['all' ])
317326 if service == '/tika' : responseMimeType = 'text/plain'
318327 status , response = callServer ('put' , serverEndpoint , service , open (path , 'rb' ),
319- headers , verbose , tikaServerJar , rawResponse = rawResponse )
328+ headers , verbose , tikaServerJar , config_path = config_path , rawResponse = rawResponse )
320329
321330 if file_type == 'remote' : os .unlink (path )
322331 return (status , response )
@@ -437,7 +446,7 @@ def detectType(option, urlOrPaths, serverEndpoint=ServerEndpoint, verbose=Verbos
437446
438447def detectType1 (option , urlOrPath , serverEndpoint = ServerEndpoint , verbose = Verbose , tikaServerJar = TikaServerJar ,
439448 responseMimeType = 'text/plain' ,
440- services = {'type' : '/detect/stream' }):
449+ services = {'type' : '/detect/stream' }, config_path = None ):
441450 '''
442451 Detect the MIME/media type of the stream and return it in text/plain.
443452 :param option:
@@ -459,7 +468,7 @@ def detectType1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbos
459468 'Accept' : responseMimeType ,
460469 'Content-Disposition' : make_content_disposition_header (path )
461470 },
462- verbose , tikaServerJar )
471+ verbose , tikaServerJar , config_path = config_path )
463472 if csvOutput == 1 :
464473 return (status , urlOrPath .decode ("UTF-8" ) + "," + response )
465474 else :
@@ -486,7 +495,7 @@ def getConfig(option, serverEndpoint=ServerEndpoint, verbose=Verbose, tikaServer
486495
487496def callServer (verb , serverEndpoint , service , data , headers , verbose = Verbose , tikaServerJar = TikaServerJar ,
488497 httpVerbs = {'get' : requests .get , 'put' : requests .put , 'post' : requests .post }, classpath = None ,
489- rawResponse = False ):
498+ rawResponse = False , config_path = None ):
490499 '''
491500 Call the Tika Server, do some error checking, and return the response.
492501 :param verb:
@@ -510,14 +519,14 @@ def callServer(verb, serverEndpoint, service, data, headers, verbose=Verbose, ti
510519
511520 global TikaClientOnly
512521 if not TikaClientOnly :
513- serverEndpoint = checkTikaServer (scheme , serverHost , port , tikaServerJar , classpath )
522+ serverEndpoint = checkTikaServer (scheme , serverHost , port , tikaServerJar , classpath , config_path )
514523
515524 serviceUrl = serverEndpoint + service
516525 if verb not in httpVerbs :
517526 log .exception ('Tika Server call must be one of %s' % binary_string (httpVerbs .keys ()))
518527 raise TikaException ('Tika Server call must be one of %s' % binary_string (httpVerbs .keys ()))
519528 verbFn = httpVerbs [verb ]
520-
529+
521530 if Windows and hasattr (data , "read" ):
522531 data = data .read ()
523532
@@ -539,7 +548,7 @@ def callServer(verb, serverEndpoint, service, data, headers, verbose=Verbose, ti
539548 return (resp .status_code , resp .text )
540549
541550
542- def checkTikaServer (scheme = "http" , serverHost = ServerHost , port = Port , tikaServerJar = TikaServerJar , classpath = None ):
551+ def checkTikaServer (scheme = "http" , serverHost = ServerHost , port = Port , tikaServerJar = TikaServerJar , classpath = None , config_path = None ):
543552 '''
544553 Check that tika-server is running. If not, download JAR file and start it up.
545554 :param scheme: e.g. http or https
@@ -565,7 +574,7 @@ def checkTikaServer(scheme="http", serverHost=ServerHost, port=Port, tikaServerJ
565574 os .remove (jarPath )
566575 tikaServerJar = getRemoteJar (tikaServerJar , jarPath )
567576
568- status = startServer (jarPath , TikaJava , serverHost , port , classpath )
577+ status = startServer (jarPath , TikaJava , serverHost , port , classpath , config_path )
569578 if not status :
570579 log .error ("Failed to receive startup confirmation from startServer." )
571580 raise RuntimeError ("Unable to start Tika server." )
@@ -589,7 +598,7 @@ def checkJarSig(tikaServerJar, jarPath):
589598 return existingContents == m .hexdigest ()
590599
591600
592- def startServer (tikaServerJar , java_path = TikaJava , serverHost = ServerHost , port = Port , classpath = None ):
601+ def startServer (tikaServerJar , java_path = TikaJava , serverHost = ServerHost , port = Port , classpath = None , config_path = None ):
593602 '''
594603 Starts Tika Server
595604 :param tikaServerJar: path to tika server jar
@@ -611,8 +620,13 @@ def startServer(tikaServerJar, java_path = TikaJava, serverHost = ServerHost, po
611620 classpath = tikaServerJar
612621
613622 # setup command string
614- cmd_string = '%s -cp %s org.apache.tika.server.TikaServerCli --port %i --host %s &' \
615- % (java_path , classpath , port , host )
623+ cmd_string = ""
624+ if not config_path :
625+ cmd_string = '%s -cp %s org.apache.tika.server.TikaServerCli --port %i --host %s &' \
626+ % (java_path , classpath , port , host )
627+ else :
628+ cmd_string = '%s -cp %s org.apache.tika.server.TikaServerCli --port %i --host %s --config %s &' \
629+ % (java_path , classpath , port , host , config_path )
616630
617631 # Check that we can write to log path
618632 try :
0 commit comments