11#!/usr/bin/env python
22# -*- coding: utf-8 -*-
3+ import importlib .util
34import logging
45import os
5- import shutil
66from pathlib import Path
77from subprocess import PIPE , STDOUT , Popen , run , check_output
88from packaging import version
@@ -190,31 +190,30 @@ def _check_pgsrip_dependencies(self) -> bool:
190190 if not self .app .fastflix .config .mkvmerge_path :
191191 missing .append ("mkvtoolnix" )
192192
193- # Check pgsrip
194- if not self . app . fastflix . config . pgsrip_path :
195- missing .append ("pgsrip" )
193+ # Check if pgsrip Python library is available
194+ if importlib . util . find_spec ( "pgsrip" ) is None :
195+ missing .append ("pgsrip (Python library) " )
196196
197197 if missing :
198198 self .main .thread_logging_signal .emit (
199199 f"ERROR:{ t ('Missing dependencies for PGS OCR' )} : { ', ' .join (missing )} \n \n "
200200 f"Install instructions:\n "
201- f" Windows: Run setup_pgs_ocr_windows.bat in FastFlix folder\n "
202- f" Linux: sudo apt install tesseract-ocr mkvtoolnix && pip install pgsrip\n "
203- f" macOS: brew install tesseract mkvtoolnix && pip install pgsrip\n \n "
204- f"Or download manually:\n "
205- f" Tesseract: https://github.com/UB-Mannheim/tesseract/wiki\n "
206- f" MKVToolNix: https://mkvtoolnix.download/downloads.html\n "
207- f" pgsrip: pip install pgsrip"
201+ f" pgsrip: pip install pgsrip\n "
202+ f" Linux: sudo apt install tesseract-ocr mkvtoolnix\n "
203+ f" macOS: brew install tesseract mkvtoolnix\n "
204+ f" Windows:\n "
205+ f" - Tesseract: https://github.com/UB-Mannheim/tesseract/wiki\n "
206+ f" - MKVToolNix: https://mkvtoolnix.download/downloads.html"
208207 )
209208 return False
210209
211210 return True
212211
213212 def _convert_sup_to_srt (self , sup_filepath : str ) -> bool :
214- """Convert an already-extracted .sup file to .srt using pgsrip OCR
213+ """Convert PGS subtitle to .srt using pgsrip OCR by processing the original MKV
215214
216215 Args:
217- sup_filepath: Path to the extracted .sup file
216+ sup_filepath: Path to the extracted .sup file (used for naming output)
218217
219218 Returns:
220219 True if conversion successful, False otherwise
@@ -228,80 +227,87 @@ def _convert_sup_to_srt(self, sup_filepath: str) -> bool:
228227 f"INFO:{ t ('Converting .sup to .srt using OCR' )} (this may take 3-5 minutes)..."
229228 )
230229
231- # Convert 3-letter language code to 2-letter for pgsrip
232- # pgsrip uses 2-letter codes in filenames (e.g., "en" not "eng")
233- from fastflix .language import Language
234- try :
235- lang_2letter = Language (self .language ).pt1 # Convert eng -> en
236- except :
237- lang_2letter = "en" # Default to English if conversion fails
238-
239- # Rename .sup file to use 2-letter language code (what pgsrip expects)
240- sup_path = Path (sup_filepath )
241- if f".{ self .language } ." in sup_path .name :
242- # Replace 3-letter with 2-letter in filename
243- new_name = sup_path .name .replace (f".{ self .language } ." , f".{ lang_2letter } ." )
244- new_sup_path = sup_path .parent / new_name
245- sup_path .rename (new_sup_path )
246- sup_filepath = str (new_sup_path )
247-
248- # Run pgsrip on the already-extracted .sup file
249- pgsrip_cmd = str (self .app .fastflix .config .pgsrip_path ) if self .app .fastflix .config .pgsrip_path else "pgsrip"
230+ # Import pgsrip Python API
231+ from pgsrip import pgsrip , Mkv , Options
232+ from babelfish import Language as BabelLanguage
250233
251234 # Set environment variables for pgsrip to find tesseract
252- import os
253- env = os .environ .copy ()
254235 if self .app .fastflix .config .tesseract_path :
255236 # Add tesseract directory to PATH so pytesseract can find it
256237 tesseract_dir = str (Path (self .app .fastflix .config .tesseract_path ).parent )
257- env [ ' PATH' ] = f"{ tesseract_dir } { os .pathsep } { env .get ('PATH' , '' )} "
258- env [ ' TESSERACT_CMD' ] = str (self .app .fastflix .config .tesseract_path )
238+ os . environ [ " PATH" ] = f"{ tesseract_dir } { os .pathsep } { os . environ .get ('PATH' , '' )} "
239+ os . environ [ " TESSERACT_CMD" ] = str (self .app .fastflix .config .tesseract_path )
259240
260- pgsrip_result = run (
261- [
262- pgsrip_cmd ,
263- "--language" , lang_2letter , # Use 2-letter code (e.g., "en", "es", "fr")
264- "--force" , # Overwrite existing files
265- sup_filepath
266- ],
267- capture_output = True ,
268- text = True ,
269- timeout = 600 , # 10 minute timeout for OCR
270- env = env # Pass environment with TESSERACT_CMD
271- )
241+ # pgsrip needs the original MKV file, not the extracted .sup
242+ # The .sup file is a raw subtitle stream, but pgsrip expects an MKV container
243+ sup_path = Path (sup_filepath )
244+ media = Mkv (self .main .input_video )
245+
246+ # Configure options for pgsrip
247+ # BabelLanguage needs different constructors for 2-letter vs 3-letter codes
248+ try :
249+ # Detect if language code is 2-letter or 3-letter
250+ if len (self .language ) == 2 :
251+ babel_lang = BabelLanguage .fromalpha2 (self .language )
252+ elif len (self .language ) == 3 :
253+ babel_lang = BabelLanguage (self .language )
254+ else :
255+ # Try as language name
256+ babel_lang = BabelLanguage .fromname (self .language )
257+
258+ options = Options (
259+ languages = {babel_lang },
260+ overwrite = True , # Overwrite existing .srt files
261+ one_per_lang = True , # Create one .srt per language
262+ )
263+ except Exception :
264+ # Fallback to English if language code is invalid
265+ options = Options (
266+ languages = {BabelLanguage ("eng" )},
267+ overwrite = True ,
268+ one_per_lang = True ,
269+ )
272270
273- if pgsrip_result . returncode != 0 :
274- error_msg = pgsrip_result . stderr if pgsrip_result . stderr else pgsrip_result . stdout
275- raise Exception ( f" pgsrip failed with return code { pgsrip_result . returncode } : { error_msg } " )
271+ # Run pgsrip conversion using Python API on the original MKV
272+ # This will create .srt files in the same directory as the video
273+ pgsrip . rip ( media , options )
276274
277- # pgsrip creates .srt file in same directory as .sup file
278- sup_path = Path (sup_filepath )
279- expected_srt = sup_path .with_suffix ('.srt' )
275+ # Look for the created .srt file
276+ # pgsrip creates files with pattern: basename.language.srt
277+ video_path = Path (self .main .input_video )
278+ srt_pattern = f"{ video_path .stem } .*.srt"
279+ srt_files = list (video_path .parent .glob (srt_pattern ))
280280
281- if not expected_srt .exists ():
282- # Look for any .srt file created near the .sup
283- srt_files = list (sup_path .parent .glob ("*.srt" ))
284- if not srt_files :
285- raise Exception (f"pgsrip completed but no .srt file found in { sup_path .parent } " )
286- expected_srt = srt_files [0 ]
281+ if not srt_files :
282+ # Fallback: look for any .srt in the video directory
283+ srt_files = list (video_path .parent .glob (f"{ video_path .stem } .srt" ))
287284
288- self .main .thread_logging_signal .emit (
289- f"INFO:{ t ('OCR conversion successful' )} : { expected_srt .name } "
290- )
285+ if not srt_files :
286+ raise Exception (f"pgsrip completed but no .srt file found in { video_path .parent } " )
287+
288+ # Move the .srt file to the expected location (same dir as .sup was)
289+ created_srt = srt_files [0 ]
290+ expected_srt = sup_path .with_suffix (".srt" )
291+
292+ if created_srt != expected_srt :
293+ # Move/rename to expected location
294+ import shutil
295+
296+ shutil .move (str (created_srt ), str (expected_srt ))
297+
298+ self .main .thread_logging_signal .emit (f"INFO:{ t ('OCR conversion successful' )} : { expected_srt .name } " )
291299
292300 # Optionally delete the .sup file since we have .srt now
293301 try :
294302 sup_path .unlink ()
295303 self .main .thread_logging_signal .emit (f"INFO:{ t ('Removed .sup file, kept .srt' )} " )
296- except :
304+ except Exception :
297305 pass
298306
299307 return True
300308
301309 except Exception as err :
302- self .main .thread_logging_signal .emit (
303- f"ERROR:{ t ('OCR conversion failed' )} : { err } "
304- )
310+ self .main .thread_logging_signal .emit (f"ERROR:{ t ('OCR conversion failed' )} : { err } " )
305311 return False
306312
307313
0 commit comments