@@ -168,6 +168,9 @@ def make_content_disposition_header(fn):
168168 "org.apache.tika.language.translate.Lingo24Translator" )
169169TikaClientOnly = os .getenv ('TIKA_CLIENT_ONLY' , False )
170170TikaServerClasspath = os .getenv ('TIKA_SERVER_CLASSPATH' , '' )
171+ TikaStartupSleep = float (os .getenv ('TIKA_STARTUP_SLEEP' , 5 ))
172+ TikaStartupMaxRetry = int (os .getenv ('TIKA_STARTUP_MAX_RETRY' , 3 ))
173+ TikaJava = os .getenv ("TIKA_JAVA" , "java" )
171174
172175Verbose = 0
173176EncodeUtf8 = 0
@@ -553,16 +556,19 @@ def checkTikaServer(scheme="http", serverHost=ServerHost, port=Port, tikaServerJ
553556 jarPath = os .path .join (TikaJarPath , 'tika-server.jar' )
554557 if 'localhost' in serverEndpoint or '127.0.0.1' in serverEndpoint :
555558 alreadyRunning = checkPortIsOpen (serverHost , port )
556-
559+
557560 if not alreadyRunning :
558561 if not os .path .isfile (jarPath ) and urlp .scheme != '' :
559- getRemoteJar (tikaServerJar , jarPath )
560-
562+ getRemoteJar (tikaServerJar , jarPath )
563+
561564 if not checkJarSig (tikaServerJar , jarPath ):
562565 os .remove (jarPath )
563566 tikaServerJar = getRemoteJar (tikaServerJar , jarPath )
564-
565- startServer (jarPath , serverHost , port , classpath )
567+
568+ status = startServer (jarPath , TikaJava , serverHost , port , classpath )
569+ if not status :
570+ log .error ("Failed to receive startup confirmation from startServer." )
571+ raise RuntimeError ("Unable to start Tika server." )
566572 return serverEndpoint
567573
568574def checkJarSig (tikaServerJar , jarPath ):
@@ -583,7 +589,7 @@ def checkJarSig(tikaServerJar, jarPath):
583589 return existingContents == m .hexdigest ()
584590
585591
586- def startServer (tikaServerJar , serverHost = ServerHost , port = Port , classpath = None ):
592+ def startServer (tikaServerJar , java_path = TikaJava , serverHost = ServerHost , port = Port , classpath = None ):
587593 '''
588594 Starts Tika Server
589595 :param tikaServerJar: path to tika server jar
@@ -594,20 +600,56 @@ def startServer(tikaServerJar, serverHost = ServerHost, port = Port, classpath=N
594600 '''
595601 if classpath is None :
596602 classpath = TikaServerClasspath
597-
603+
598604 host = "localhost"
599605 if Windows :
600606 host = "0.0.0.0"
601-
607+
602608 if classpath :
603609 classpath += ":" + tikaServerJar
604610 else :
605611 classpath = tikaServerJar
606-
607- cmd = 'java -cp %s org.apache.tika.server.TikaServerCli --port %i --host %s &' % (classpath , port , host )
608- logFile = open (os .path .join (TikaServerLogFilePath , 'tika-server.log' ), 'w' )
609- cmd = Popen (cmd , stdout = logFile , stderr = STDOUT , shell = True )
610- time .sleep (5 )
612+
613+ # setup command string
614+ cmd_string = '%s -cp %s org.apache.tika.server.TikaServerCli --port %i --host %s &' \
615+ % (java_path , classpath , port , host )
616+
617+ # Check that we can write to log path
618+ try :
619+ tika_log_file_path = os .path .join (TikaServerLogFilePath , 'tika-server.log' )
620+ logFile = open (tika_log_file_path , 'w' )
621+ except PermissionError as e :
622+ log .error ("Unable to create tika-server.log at %s due to permission error." % (TikaServerLogFilePath ))
623+ return False
624+
625+ # Check that specified java binary is available on path
626+ try :
627+ _ = Popen (java_path , stdout = open (os .devnull , "w" ), stderr = open (os .devnull , "w" ))
628+ except FileNotFoundError as e :
629+ log .error ("Unable to run java; is it installed?" )
630+ return False
631+
632+ # Run java with jar args
633+ cmd = Popen (cmd_string , stdout = logFile , stderr = STDOUT , shell = True )
634+
635+ # Check logs and retry as configured
636+ try_count = 0
637+ is_started = False
638+ while try_count < TikaStartupMaxRetry :
639+ with open (tika_log_file_path , "r" ) as tika_log_file_tmp :
640+ # check for INFO string to confirm listening endpoint
641+ if "Started Apache Tika server at" in tika_log_file_tmp .read ():
642+ is_started = True
643+ else :
644+ log .warning ("Failed to see startup log message; retrying..." )
645+ time .sleep (TikaStartupSleep )
646+ try_count += 1
647+
648+ if not is_started :
649+ log .error ("Tika startup log message not received after %d tries." % (TikaStartupMaxRetry ))
650+ return False
651+ else :
652+ return True
611653
612654def toFilename (urlOrPath ):
613655 value = re .sub ('[^\w\s-]' , '-' , urlOrPath ).strip ().lower ()
0 commit comments