11#!/usr/bin/env python
22# -*- coding: utf-8 -*-
3+ import importlib .util
34import logging
45import os
5- import shutil
66from pathlib import Path
77from subprocess import PIPE , STDOUT , Popen , run , check_output
88from packaging import version
@@ -190,21 +190,20 @@ def _check_pgsrip_dependencies(self) -> bool:
190190 if not self .app .fastflix .config .mkvmerge_path :
191191 missing .append ("mkvtoolnix" )
192192
193- # Check pgsrip
194- if not self . app . fastflix . config . pgsrip_path :
195- missing .append ("pgsrip" )
193+ # Check if pgsrip Python library is available
194+ if importlib . util . find_spec ( "pgsrip" ) is None :
195+ missing .append ("pgsrip (Python library) " )
196196
197197 if missing :
198198 self .main .thread_logging_signal .emit (
199199 f"ERROR:{ t ('Missing dependencies for PGS OCR' )} : { ', ' .join (missing )} \n \n "
200200 f"Install instructions:\n "
201- f" Windows: Run setup_pgs_ocr_windows.bat in FastFlix folder\n "
202- f" Linux: sudo apt install tesseract-ocr mkvtoolnix && pip install pgsrip\n "
203- f" macOS: brew install tesseract mkvtoolnix && pip install pgsrip\n \n "
204- f"Or download manually:\n "
205- f" Tesseract: https://github.com/UB-Mannheim/tesseract/wiki\n "
206- f" MKVToolNix: https://mkvtoolnix.download/downloads.html\n "
207- f" pgsrip: pip install pgsrip"
201+ f" pgsrip: pip install pgsrip\n "
202+ f" Linux: sudo apt install tesseract-ocr mkvtoolnix\n "
203+ f" macOS: brew install tesseract mkvtoolnix\n "
204+ f" Windows:\n "
205+ f" - Tesseract: https://github.com/UB-Mannheim/tesseract/wiki\n "
206+ f" - MKVToolNix: https://mkvtoolnix.download/downloads.html"
208207 )
209208 return False
210209
@@ -228,55 +227,40 @@ def _convert_sup_to_srt(self, sup_filepath: str) -> bool:
228227 f"INFO:{ t ('Converting .sup to .srt using OCR' )} (this may take 3-5 minutes)..."
229228 )
230229
231- # Convert 3-letter language code to 2-letter for pgsrip
232- # pgsrip uses 2-letter codes in filenames (e.g., "en" not "eng")
233- from fastflix .language import Language
234- try :
235- lang_2letter = Language (self .language ).pt1 # Convert eng -> en
236- except :
237- lang_2letter = "en" # Default to English if conversion fails
238-
239- # Rename .sup file to use 2-letter language code (what pgsrip expects)
240- sup_path = Path (sup_filepath )
241- if f".{ self .language } ." in sup_path .name :
242- # Replace 3-letter with 2-letter in filename
243- new_name = sup_path .name .replace (f".{ self .language } ." , f".{ lang_2letter } ." )
244- new_sup_path = sup_path .parent / new_name
245- sup_path .rename (new_sup_path )
246- sup_filepath = str (new_sup_path )
247-
248- # Run pgsrip on the already-extracted .sup file
249- pgsrip_cmd = str (self .app .fastflix .config .pgsrip_path ) if self .app .fastflix .config .pgsrip_path else "pgsrip"
230+ # Import pgsrip Python API
231+ from pgsrip import pgsrip , Mkv , Options
232+ from babelfish import Language as BabelLanguage
250233
251234 # Set environment variables for pgsrip to find tesseract
252- import os
253- env = os .environ .copy ()
254235 if self .app .fastflix .config .tesseract_path :
255236 # Add tesseract directory to PATH so pytesseract can find it
256237 tesseract_dir = str (Path (self .app .fastflix .config .tesseract_path ).parent )
257- env [ ' PATH' ] = f"{ tesseract_dir } { os .pathsep } { env .get ('PATH' , '' )} "
258- env [ ' TESSERACT_CMD' ] = str (self .app .fastflix .config .tesseract_path )
238+ os . environ [ " PATH" ] = f"{ tesseract_dir } { os .pathsep } { os . environ .get ('PATH' , '' )} "
239+ os . environ [ " TESSERACT_CMD" ] = str (self .app .fastflix .config .tesseract_path )
259240
260- pgsrip_result = run (
261- [
262- pgsrip_cmd ,
263- "--language" , lang_2letter , # Use 2-letter code (e.g., "en", "es", "fr")
264- "--force" , # Overwrite existing files
265- sup_filepath
266- ],
267- capture_output = True ,
268- text = True ,
269- timeout = 600 , # 10 minute timeout for OCR
270- env = env # Pass environment with TESSERACT_CMD
271- )
241+ # Create Mkv media object for the .sup file
242+ sup_path = Path (sup_filepath )
243+ media = Mkv (sup_filepath )
272244
273- if pgsrip_result .returncode != 0 :
274- error_msg = pgsrip_result .stderr if pgsrip_result .stderr else pgsrip_result .stdout
275- raise Exception (f"pgsrip failed with return code { pgsrip_result .returncode } : { error_msg } " )
245+ # Configure options for pgsrip - use 3-letter language code (e.g., "eng", "spa", "fra")
246+ # BabelLanguage expects 3-letter ISO 639-2 codes
247+ try :
248+ options = Options (
249+ languages = {BabelLanguage (self .language )}, # Use 3-letter code directly
250+ overwrite = True , # Overwrite existing .srt files
251+ )
252+ except Exception :
253+ # Fallback to English if language code is invalid
254+ options = Options (
255+ languages = {BabelLanguage ("eng" )},
256+ overwrite = True ,
257+ )
276258
277- # pgsrip creates .srt file in same directory as .sup file
278- sup_path = Path (sup_filepath )
279- expected_srt = sup_path .with_suffix ('.srt' )
259+ # Run pgsrip conversion using Python API
260+ pgsrip .rip (media , options )
261+
262+ # Check if .srt file was created
263+ expected_srt = sup_path .with_suffix (".srt" )
280264
281265 if not expected_srt .exists ():
282266 # Look for any .srt file created near the .sup
@@ -285,23 +269,19 @@ def _convert_sup_to_srt(self, sup_filepath: str) -> bool:
285269 raise Exception (f"pgsrip completed but no .srt file found in { sup_path .parent } " )
286270 expected_srt = srt_files [0 ]
287271
288- self .main .thread_logging_signal .emit (
289- f"INFO:{ t ('OCR conversion successful' )} : { expected_srt .name } "
290- )
272+ self .main .thread_logging_signal .emit (f"INFO:{ t ('OCR conversion successful' )} : { expected_srt .name } " )
291273
292274 # Optionally delete the .sup file since we have .srt now
293275 try :
294276 sup_path .unlink ()
295277 self .main .thread_logging_signal .emit (f"INFO:{ t ('Removed .sup file, kept .srt' )} " )
296- except :
278+ except Exception :
297279 pass
298280
299281 return True
300282
301283 except Exception as err :
302- self .main .thread_logging_signal .emit (
303- f"ERROR:{ t ('OCR conversion failed' )} : { err } "
304- )
284+ self .main .thread_logging_signal .emit (f"ERROR:{ t ('OCR conversion failed' )} : { err } " )
305285 return False
306286
307287
0 commit comments