@@ -49,23 +49,41 @@ def check_tika_jar_available() -> Tuple[bool, Optional[Path]]:
4949 Returns:
5050 Tuple of (is_available, jar_path)
5151 """
52- # Tika downloads to /tmp on Unix systems
53- possible_paths = [
52+ # Check environment variable first (for Docker containers)
53+ env_path = os .getenv ('TIKA_JAR_PATH' )
54+ if env_path :
55+ jar_path = Path (env_path )
56+ if jar_path .exists ():
57+ logger .debug (f"Tika JAR found via TIKA_JAR_PATH: { jar_path } " )
58+ return True , jar_path
59+
60+ # Static paths to check
61+ static_paths = [
5462 Path ('/tmp/tika-server.jar' ),
55- Path ('/var/folders' ).rglob ('tika-server.jar' ), # macOS temp
5663 Path .home () / '.tika' / 'tika-server.jar' ,
5764 ]
5865
59- for path in possible_paths :
60- if isinstance (path , Path ) and path .exists ():
66+ for path in static_paths :
67+ if path .exists ():
68+ logger .debug (f"Tika JAR found at: { path } " )
6169 return True , path
62- # For glob results
63- try :
64- for found_path in path :
65- if found_path .exists ():
66- return True , found_path
67- except (TypeError , AttributeError ):
68- continue
70+
71+ # Glob patterns for versioned JARs (tika-server-X.Y.Z.jar)
72+ glob_patterns = [
73+ (Path ('/tmp' ), 'tika-server*.jar' ),
74+ (Path .home () / '.tika' , 'tika-server*.jar' ),
75+ (Path ('/var/folders' ), '**/tika-server*.jar' ), # macOS temp
76+ ]
77+
78+ for base_path , pattern in glob_patterns :
79+ if base_path .exists ():
80+ try :
81+ for found_path in base_path .glob (pattern ):
82+ if found_path .exists () and found_path .is_file ():
83+ logger .debug (f"Tika JAR found via glob: { found_path } " )
84+ return True , found_path
85+ except (PermissionError , OSError ):
86+ continue
6987
7088 return False , None
7189
@@ -239,13 +257,34 @@ def ensure_tika_ready(interactive: bool = True, auto_skip: bool = False) -> bool
239257
240258def is_tika_available () -> bool :
241259 """
242- Simple check if Tika is available for use.
260+ Check if Tika is available for use.
261+
262+ First checks via check_tika_available(), then falls back to trying
263+ tika-python directly in case the JAR is in an unexpected location.
243264
244265 Returns:
245- True if Java and Tika JAR are available , False otherwise.
266+ True if Tika can be used , False otherwise.
246267 """
268+ # First try standard check
247269 status = check_tika_available ()
248- return status ['can_use_tika' ]
270+ if status ['can_use_tika' ]:
271+ return True
272+
273+ # Fallback: try tika-python directly (it knows its own JAR location)
274+ if status ['java_installed' ]:
275+ try :
276+ from tika import tika
277+ # Check if tika-python has a valid JAR path
278+ jar_path = getattr (tika , 'TikaJarPath' , None )
279+ if jar_path and Path (jar_path ).exists ():
280+ logger .info (f"Tika JAR found via tika-python: { jar_path } " )
281+ return True
282+ except ImportError :
283+ pass
284+ except Exception as e :
285+ logger .debug (f"Tika-python fallback check failed: { e } " )
286+
287+ return False
249288
250289
251290def print_tika_status ():
0 commit comments