@@ -128,8 +128,9 @@ def _process_episode(self, item, source_language, target_language, forced, hi, s
128128
129129 # Find source subtitle
130130 source_subtitle_path = subtitle_path
131+ detected_source_lang = None
131132 if not source_subtitle_path :
132- source_subtitle_path = self ._find_subtitle_by_language (
133+ source_subtitle_path , detected_source_lang = self ._find_subtitle_by_language (
133134 episode .subtitles , source_language , video_path , media_type = 'series'
134135 )
135136
@@ -139,6 +140,10 @@ def _process_episode(self, item, source_language, target_language, forced, hi, s
139140 'error' : f'No subtitle found for episode { sonarr_episode_id } (requested source: { source_language } )'
140141 }
141142
143+ # Use detected language if available
144+ if detected_source_lang :
145+ source_language = detected_source_lang
146+
142147 # Queue translation
143148 try :
144149 result = translate_subtitles_file (
@@ -178,8 +183,9 @@ def _process_movie(self, item, source_language, target_language, forced, hi, sub
178183
179184 # Find source subtitle
180185 source_subtitle_path = subtitle_path
186+ detected_source_lang = None
181187 if not source_subtitle_path :
182- source_subtitle_path = self ._find_subtitle_by_language (
188+ source_subtitle_path , detected_source_lang = self ._find_subtitle_by_language (
183189 movie .subtitles , source_language , video_path , media_type = 'movie'
184190 )
185191
@@ -189,6 +195,10 @@ def _process_movie(self, item, source_language, target_language, forced, hi, sub
189195 'error' : f'No subtitle found for movie { radarr_id } (requested source: { source_language } )'
190196 }
191197
198+ # Use detected language if available
199+ if detected_source_lang :
200+ source_language = detected_source_lang
201+
192202 # Queue translation
193203 try :
194204 result = translate_subtitles_file (
@@ -222,58 +232,44 @@ def _find_subtitle_by_language(self, subtitles, language_code, video_path, media
222232 media_type: Either 'movie' or 'series' for correct path mapping
223233
224234 Returns:
225- Path to the subtitle file, or None if no subtitles available
235+ Tuple of ( Path to the subtitle file, detected language code), or ( None, None) if no subtitles available
226236 """
227- import json
237+ import ast
228238 import os
229239
230240 logger .debug (f'Looking for "{ language_code } " subtitle. Subtitles data type: { type (subtitles )} ' )
231241
232- if not subtitles :
233- logger .debug ('No subtitles data found in database for this media' )
234- return None
235-
236- # Parse subtitles if it's a string (JSON)
237- if isinstance (subtitles , str ):
238- try :
239- subtitles = json .loads (subtitles )
240- except json .JSONDecodeError :
241- logger .error ('Failed to parse subtitles JSON from database' )
242- return None
243-
244- if not isinstance (subtitles , list ):
245- logger .debug (f'Subtitles is not a list: { type (subtitles )} ' )
246- return None
247-
248- logger .debug (f'Found { len (subtitles )} subtitle(s) in database' )
249-
250- # Collect available subtitles with their paths for better processing
251242 available_subtitles = []
252- for sub in subtitles :
253- if isinstance (sub , dict ):
254- sub_code = sub .get ('code2' , '' )
255- sub_path = sub .get ('path' , '' )
256- sub_hi = sub .get ('hi' , False )
257- sub_forced = sub .get ('forced' , False )
258-
259- if sub_path :
260- available_subtitles .append ({
261- 'code2' : sub_code ,
262- 'path' : sub_path ,
263- 'hi' : sub_hi ,
264- 'forced' : sub_forced
265- })
266-
267- available_codes = [s ['code2' ] for s in available_subtitles if s ['code2' ]]
268-
269- if available_codes :
270- logger .info (f'Available subtitle language codes: { available_codes } ' )
271- else :
272- logger .warning ('No language codes found in subtitle data' )
273243
274- if not available_subtitles :
275- logger .warning ('No subtitle files with valid paths found' )
276- return None
244+ if subtitles :
245+ # Parse subtitles if it's a string (Python literal from DB)
246+ if isinstance (subtitles , str ):
247+ try :
248+ subtitles = ast .literal_eval (subtitles )
249+ except (ValueError , SyntaxError ):
250+ logger .error ('Failed to parse subtitles from database' )
251+ subtitles = []
252+
253+ if isinstance (subtitles , list ):
254+ logger .debug (f'Found { len (subtitles )} subtitle(s) in database' )
255+
256+ # Collect available subtitles with their paths for better processing
257+ for sub in subtitles :
258+ # DB format is [lang_str, path, size]
259+ if isinstance (sub , (list , tuple )) and len (sub ) >= 2 :
260+ lang_parts = sub [0 ].split (':' )
261+ sub_code = lang_parts [0 ]
262+ sub_path = sub [1 ]
263+ sub_hi = len (lang_parts ) > 1 and lang_parts [1 ].lower () == 'hi'
264+ sub_forced = len (lang_parts ) > 1 and lang_parts [1 ].lower () == 'forced'
265+
266+ if sub_path :
267+ available_subtitles .append ({
268+ 'code2' : sub_code ,
269+ 'path' : sub_path ,
270+ 'hi' : sub_hi ,
271+ 'forced' : sub_forced
272+ })
277273
278274 # Helper function to resolve and validate subtitle path
279275 def resolve_subtitle_path (sub_path ):
@@ -294,7 +290,7 @@ def resolve_subtitle_path(sub_path):
294290
295291 return None
296292
297- # First pass: Look for exact language match
293+ # First pass: Look for exact language match in DB
298294 exact_matches = [s for s in available_subtitles if s ['code2' ] == language_code ]
299295
300296 # Sort matches: prefer non-HI, non-forced first, then HI, then forced
@@ -305,29 +301,105 @@ def resolve_subtitle_path(sub_path):
305301 if resolved_path :
306302 logger .info (f'Found exact language match "{ language_code } " at { resolved_path } '
307303 f'(hi={ sub ["hi" ]} , forced={ sub ["forced" ]} )' )
308- return resolved_path
304+ return resolved_path , sub [ 'code2' ]
309305
310- # Second pass: If no exact match found, try any available subtitle
311- logger .info (f'No exact match for "{ language_code } " found. '
312- f'Falling back to any available subtitle.' )
306+ # Second pass: If no exact match found in DB, try any available subtitle from DB
307+ if available_subtitles :
308+ logger .info (f'No exact match for "{ language_code } " found in DB. '
309+ f'Falling back to any available subtitle from DB.' )
310+
311+ # Sort all available: prefer non-HI, non-forced, and prioritize common languages
312+ common_languages = ['en' , 'eng' ] # English often has good quality subs
313+
314+ def sort_key (sub ):
315+ is_common = sub ['code2' ] in common_languages
316+ return (sub ['forced' ], sub ['hi' ], not is_common )
317+
318+ available_subtitles .sort (key = sort_key )
319+
320+ for sub in available_subtitles :
321+ resolved_path = resolve_subtitle_path (sub ['path' ])
322+ if resolved_path :
323+ logger .warning (f'Using fallback subtitle with language "{ sub ["code2" ]} " at { resolved_path } '
324+ f'(hi={ sub ["hi" ]} , forced={ sub ["forced" ]} ). '
325+ f'Requested language was "{ language_code } ".' )
326+ return resolved_path , sub ['code2' ]
327+
328+ # Third pass: Scan filesystem fallback
329+ logger .info (f'No usable subtitle found in DB. Scanning filesystem near { video_path } ' )
330+ filesystem_subs = self ._scan_filesystem_for_subtitles (video_path )
331+
332+ if filesystem_subs :
333+ # Prefer English
334+ for sub in filesystem_subs :
335+ if sub ['is_english' ]:
336+ logger .info (f'Found English subtitle on filesystem: { sub ["path" ]} ' )
337+ return sub ['path' ], 'en'
338+
339+ # Use first available
340+ sub = filesystem_subs [0 ]
341+ logger .info (f'Using non-English subtitle from filesystem: { sub ["path" ]} (detected: { sub ["detected_language" ]} )' )
342+ return sub ['path' ], sub ['detected_language' ]
313343
314- # Sort all available: prefer non-HI, non-forced, and prioritize common languages
315- common_languages = ['en' , 'eng' ] # English often has good quality subs
344+ logger .warning (f'No usable subtitle files found in DB or on filesystem.' )
345+ return None , None
346+
347+ def _scan_filesystem_for_subtitles (self , video_path ):
348+ """Scan filesystem for .srt files next to the video file."""
349+ import os
350+ import re
351+
352+ ENGLISH_PATTERNS = [
353+ r'\.en\.srt$' , r'\.eng\.srt$' , r'\.english\.srt$' ,
354+ r'[._-]en[._-]' , r'[._-]eng[._-]' , r'[._-]english[._-]' ,
355+ ]
316356
317- def sort_key ( sub ):
318- is_common = sub [ 'code2' ] in common_languages
319- return ( sub [ 'forced' ], sub [ 'hi' ], not is_common )
357+ video_dir = os . path . dirname ( video_path )
358+ video_name = os . path . splitext ( os . path . basename ( video_path ))[ 0 ]
359+ results = []
320360
321- available_subtitles .sort (key = sort_key )
361+ # Search directories
362+ search_dirs = [video_dir ]
363+ for subfolder in ['Subs' , 'Subtitles' , 'subs' , 'subtitles' , video_name ]:
364+ subdir = os .path .join (video_dir , subfolder )
365+ if os .path .isdir (subdir ):
366+ search_dirs .append (subdir )
322367
323- for sub in available_subtitles :
324- resolved_path = resolve_subtitle_path (sub ['path' ])
325- if resolved_path :
326- logger .warning (f'Using fallback subtitle with language "{ sub ["code2" ]} " at { resolved_path } '
327- f'(hi={ sub ["hi" ]} , forced={ sub ["forced" ]} ). '
328- f'Requested language was "{ language_code } ".' )
329- return resolved_path
368+ for directory in search_dirs :
369+ try :
370+ for filename in os .listdir (directory ):
371+ if filename .lower ().endswith ('.srt' ):
372+ full_path = os .path .join (directory , filename )
373+
374+ # Detect language from filename
375+ is_english = any (re .search (p , filename .lower ()) for p in ENGLISH_PATTERNS )
376+ detected_lang = 'en' if is_english else self ._detect_language_from_content (full_path )
377+
378+ results .append ({
379+ 'path' : full_path ,
380+ 'filename' : filename ,
381+ 'is_english' : is_english or detected_lang == 'en' ,
382+ 'detected_language' : detected_lang or 'und'
383+ })
384+ except OSError :
385+ continue
330386
331- logger .warning (f'No usable subtitle files found. '
332- f'Checked { len (available_subtitles )} subtitle(s), none exist on disk.' )
333- return None
387+ # Sort: English first
388+ results .sort (key = lambda x : (not x ['is_english' ], x ['filename' ]))
389+ return results
390+
391+ def _detect_language_from_content (self , srt_path ):
392+ """Detect language by analyzing subtitle content."""
393+ from guess_language import guess_language
394+ from charset_normalizer import detect
395+ try :
396+ with open (srt_path , 'rb' ) as f :
397+ raw = f .read (8192 ) # Read first 8KB
398+
399+ encoding = detect (raw )
400+ if encoding and encoding .get ('encoding' ):
401+ text = raw .decode (encoding ['encoding' ], errors = 'ignore' )
402+ return guess_language (text )
403+ except Exception :
404+ pass
405+ return None
0 commit comments