Skip to content

Commit 1c6c486

Browse files
committed
Address PR review feedback
- Use environment variables for Windows tool detection instead of scanning all drives (LOCALAPPDATA, PROGRAMFILES, PROGRAMFILES(X86)) - Remove pgsrip_path config field and use pgsrip Python API directly - Update dependency checks to use importlib for pgsrip library - Fix BabelLanguage to handle both 2-letter and 3-letter ISO codes - Update error messages and installation instructions All changes pass pre-commit linting checks.
1 parent 5108f53 commit 1c6c486

File tree

4 files changed

+135
-103
lines changed

4 files changed

+135
-103
lines changed

fastflix/models/config.py

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -111,22 +111,29 @@ def find_ocr_tool(name):
111111

112112
# Special handling for tesseract on Windows (not in PATH by default)
113113
if name == "tesseract" and win_based:
114-
# Check common install locations on all drives
115-
import string
116-
drives = [f"{d}:" for d in string.ascii_uppercase if Path(f"{d}:/").exists()]
117-
118-
for drive in drives:
119-
common_paths = [
120-
Path(f"{drive}/Program Files/Tesseract-OCR/tesseract.exe"),
121-
Path(f"{drive}/Program Files (x86)/Tesseract-OCR/tesseract.exe"),
122-
]
123-
for path in common_paths:
124-
if path.exists():
125-
return path
114+
# Check common install locations using environment variables
115+
localappdata = os.getenv("LOCALAPPDATA")
116+
program_files = os.getenv("PROGRAMFILES")
117+
program_files_x86 = os.getenv("PROGRAMFILES(X86)")
118+
119+
common_paths = []
120+
# Check user-local installation first
121+
if localappdata:
122+
common_paths.append(Path(localappdata) / "Programs" / "Tesseract-OCR" / "tesseract.exe")
123+
# Check system-wide installations
124+
if program_files:
125+
common_paths.append(Path(program_files) / "Tesseract-OCR" / "tesseract.exe")
126+
if program_files_x86:
127+
common_paths.append(Path(program_files_x86) / "Tesseract-OCR" / "tesseract.exe")
128+
129+
for path in common_paths:
130+
if path.exists():
131+
return path
126132

127133
# Check Windows registry for Tesseract install location
128134
try:
129135
import winreg
136+
130137
# Try HKEY_LOCAL_MACHINE first (system-wide install)
131138
for root_key in [winreg.HKEY_LOCAL_MACHINE, winreg.HKEY_CURRENT_USER]:
132139
try:
@@ -143,17 +150,24 @@ def find_ocr_tool(name):
143150

144151
# Special handling for mkvmerge on Windows
145152
if name == "mkvmerge" and win_based:
146-
import string
147-
drives = [f"{d}:" for d in string.ascii_uppercase if Path(f"{d}:/").exists()]
148-
149-
for drive in drives:
150-
common_paths = [
151-
Path(f"{drive}/Program Files/MKVToolNix/mkvmerge.exe"),
152-
Path(f"{drive}/Program Files (x86)/MKVToolNix/mkvmerge.exe"),
153-
]
154-
for path in common_paths:
155-
if path.exists():
156-
return path
153+
# Check common install locations using environment variables
154+
localappdata = os.getenv("LOCALAPPDATA")
155+
program_files = os.getenv("PROGRAMFILES")
156+
program_files_x86 = os.getenv("PROGRAMFILES(X86)")
157+
158+
common_paths = []
159+
# Check user-local installation first
160+
if localappdata:
161+
common_paths.append(Path(localappdata) / "Programs" / "MKVToolNix" / "mkvmerge.exe")
162+
# Check system-wide installations
163+
if program_files:
164+
common_paths.append(Path(program_files) / "MKVToolNix" / "mkvmerge.exe")
165+
if program_files_x86:
166+
common_paths.append(Path(program_files_x86) / "MKVToolNix" / "mkvmerge.exe")
167+
168+
for path in common_paths:
169+
if path.exists():
170+
return path
157171

158172
# Check in FastFlix OCR tools folder
159173
ocr_folder = Path(user_data_dir("FastFlix_OCR", appauthor=False, roaming=True))
@@ -243,7 +257,6 @@ class Config(BaseModel):
243257
enable_pgs_ocr: bool = False
244258
tesseract_path: Path | None = Field(default_factory=lambda: find_ocr_tool("tesseract"))
245259
mkvmerge_path: Path | None = Field(default_factory=lambda: find_ocr_tool("mkvmerge"))
246-
pgsrip_path: Path | None = Field(default_factory=lambda: find_ocr_tool("pgsrip"))
247260
pgs_ocr_language: str = "eng"
248261

249262
def encoder_opt(self, profile_name, profile_option_name):

fastflix/widgets/background_tasks.py

Lines changed: 73 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
3+
import importlib.util
34
import logging
45
import os
5-
import shutil
66
from pathlib import Path
77
from subprocess import PIPE, STDOUT, Popen, run, check_output
88
from packaging import version
@@ -190,31 +190,30 @@ def _check_pgsrip_dependencies(self) -> bool:
190190
if not self.app.fastflix.config.mkvmerge_path:
191191
missing.append("mkvtoolnix")
192192

193-
# Check pgsrip
194-
if not self.app.fastflix.config.pgsrip_path:
195-
missing.append("pgsrip")
193+
# Check if pgsrip Python library is available
194+
if importlib.util.find_spec("pgsrip") is None:
195+
missing.append("pgsrip (Python library)")
196196

197197
if missing:
198198
self.main.thread_logging_signal.emit(
199199
f"ERROR:{t('Missing dependencies for PGS OCR')}: {', '.join(missing)}\n\n"
200200
f"Install instructions:\n"
201-
f" Windows: Run setup_pgs_ocr_windows.bat in FastFlix folder\n"
202-
f" Linux: sudo apt install tesseract-ocr mkvtoolnix && pip install pgsrip\n"
203-
f" macOS: brew install tesseract mkvtoolnix && pip install pgsrip\n\n"
204-
f"Or download manually:\n"
205-
f" Tesseract: https://github.com/UB-Mannheim/tesseract/wiki\n"
206-
f" MKVToolNix: https://mkvtoolnix.download/downloads.html\n"
207-
f" pgsrip: pip install pgsrip"
201+
f" pgsrip: pip install pgsrip\n"
202+
f" Linux: sudo apt install tesseract-ocr mkvtoolnix\n"
203+
f" macOS: brew install tesseract mkvtoolnix\n"
204+
f" Windows:\n"
205+
f" - Tesseract: https://github.com/UB-Mannheim/tesseract/wiki\n"
206+
f" - MKVToolNix: https://mkvtoolnix.download/downloads.html"
208207
)
209208
return False
210209

211210
return True
212211

213212
def _convert_sup_to_srt(self, sup_filepath: str) -> bool:
214-
"""Convert an already-extracted .sup file to .srt using pgsrip OCR
213+
"""Convert PGS subtitle to .srt using pgsrip OCR by processing the original MKV
215214
216215
Args:
217-
sup_filepath: Path to the extracted .sup file
216+
sup_filepath: Path to the extracted .sup file (used for naming output)
218217
219218
Returns:
220219
True if conversion successful, False otherwise
@@ -228,80 +227,87 @@ def _convert_sup_to_srt(self, sup_filepath: str) -> bool:
228227
f"INFO:{t('Converting .sup to .srt using OCR')} (this may take 3-5 minutes)..."
229228
)
230229

231-
# Convert 3-letter language code to 2-letter for pgsrip
232-
# pgsrip uses 2-letter codes in filenames (e.g., "en" not "eng")
233-
from fastflix.language import Language
234-
try:
235-
lang_2letter = Language(self.language).pt1 # Convert eng -> en
236-
except:
237-
lang_2letter = "en" # Default to English if conversion fails
238-
239-
# Rename .sup file to use 2-letter language code (what pgsrip expects)
240-
sup_path = Path(sup_filepath)
241-
if f".{self.language}." in sup_path.name:
242-
# Replace 3-letter with 2-letter in filename
243-
new_name = sup_path.name.replace(f".{self.language}.", f".{lang_2letter}.")
244-
new_sup_path = sup_path.parent / new_name
245-
sup_path.rename(new_sup_path)
246-
sup_filepath = str(new_sup_path)
247-
248-
# Run pgsrip on the already-extracted .sup file
249-
pgsrip_cmd = str(self.app.fastflix.config.pgsrip_path) if self.app.fastflix.config.pgsrip_path else "pgsrip"
230+
# Import pgsrip Python API
231+
from pgsrip import pgsrip, Mkv, Options
232+
from babelfish import Language as BabelLanguage
250233

251234
# Set environment variables for pgsrip to find tesseract
252-
import os
253-
env = os.environ.copy()
254235
if self.app.fastflix.config.tesseract_path:
255236
# Add tesseract directory to PATH so pytesseract can find it
256237
tesseract_dir = str(Path(self.app.fastflix.config.tesseract_path).parent)
257-
env['PATH'] = f"{tesseract_dir}{os.pathsep}{env.get('PATH', '')}"
258-
env['TESSERACT_CMD'] = str(self.app.fastflix.config.tesseract_path)
238+
os.environ["PATH"] = f"{tesseract_dir}{os.pathsep}{os.environ.get('PATH', '')}"
239+
os.environ["TESSERACT_CMD"] = str(self.app.fastflix.config.tesseract_path)
259240

260-
pgsrip_result = run(
261-
[
262-
pgsrip_cmd,
263-
"--language", lang_2letter, # Use 2-letter code (e.g., "en", "es", "fr")
264-
"--force", # Overwrite existing files
265-
sup_filepath
266-
],
267-
capture_output=True,
268-
text=True,
269-
timeout=600, # 10 minute timeout for OCR
270-
env=env # Pass environment with TESSERACT_CMD
271-
)
241+
# pgsrip needs the original MKV file, not the extracted .sup
242+
# The .sup file is a raw subtitle stream, but pgsrip expects an MKV container
243+
sup_path = Path(sup_filepath)
244+
media = Mkv(self.main.input_video)
245+
246+
# Configure options for pgsrip
247+
# BabelLanguage needs different constructors for 2-letter vs 3-letter codes
248+
try:
249+
# Detect if language code is 2-letter or 3-letter
250+
if len(self.language) == 2:
251+
babel_lang = BabelLanguage.fromalpha2(self.language)
252+
elif len(self.language) == 3:
253+
babel_lang = BabelLanguage(self.language)
254+
else:
255+
# Try as language name
256+
babel_lang = BabelLanguage.fromname(self.language)
257+
258+
options = Options(
259+
languages={babel_lang},
260+
overwrite=True, # Overwrite existing .srt files
261+
one_per_lang=True, # Create one .srt per language
262+
)
263+
except Exception:
264+
# Fallback to English if language code is invalid
265+
options = Options(
266+
languages={BabelLanguage("eng")},
267+
overwrite=True,
268+
one_per_lang=True,
269+
)
272270

273-
if pgsrip_result.returncode != 0:
274-
error_msg = pgsrip_result.stderr if pgsrip_result.stderr else pgsrip_result.stdout
275-
raise Exception(f"pgsrip failed with return code {pgsrip_result.returncode}: {error_msg}")
271+
# Run pgsrip conversion using Python API on the original MKV
272+
# This will create .srt files in the same directory as the video
273+
pgsrip.rip(media, options)
276274

277-
# pgsrip creates .srt file in same directory as .sup file
278-
sup_path = Path(sup_filepath)
279-
expected_srt = sup_path.with_suffix('.srt')
275+
# Look for the created .srt file
276+
# pgsrip creates files with pattern: basename.language.srt
277+
video_path = Path(self.main.input_video)
278+
srt_pattern = f"{video_path.stem}.*.srt"
279+
srt_files = list(video_path.parent.glob(srt_pattern))
280280

281-
if not expected_srt.exists():
282-
# Look for any .srt file created near the .sup
283-
srt_files = list(sup_path.parent.glob("*.srt"))
284-
if not srt_files:
285-
raise Exception(f"pgsrip completed but no .srt file found in {sup_path.parent}")
286-
expected_srt = srt_files[0]
281+
if not srt_files:
282+
# Fallback: look for any .srt in the video directory
283+
srt_files = list(video_path.parent.glob(f"{video_path.stem}.srt"))
287284

288-
self.main.thread_logging_signal.emit(
289-
f"INFO:{t('OCR conversion successful')}: {expected_srt.name}"
290-
)
285+
if not srt_files:
286+
raise Exception(f"pgsrip completed but no .srt file found in {video_path.parent}")
287+
288+
# Move the .srt file to the expected location (same dir as .sup was)
289+
created_srt = srt_files[0]
290+
expected_srt = sup_path.with_suffix(".srt")
291+
292+
if created_srt != expected_srt:
293+
# Move/rename to expected location
294+
import shutil
295+
296+
shutil.move(str(created_srt), str(expected_srt))
297+
298+
self.main.thread_logging_signal.emit(f"INFO:{t('OCR conversion successful')}: {expected_srt.name}")
291299

292300
# Optionally delete the .sup file since we have .srt now
293301
try:
294302
sup_path.unlink()
295303
self.main.thread_logging_signal.emit(f"INFO:{t('Removed .sup file, kept .srt')}")
296-
except:
304+
except Exception:
297305
pass
298306

299307
return True
300308

301309
except Exception as err:
302-
self.main.thread_logging_signal.emit(
303-
f"ERROR:{t('OCR conversion failed')}: {err}"
304-
)
310+
self.main.thread_logging_signal.emit(f"ERROR:{t('OCR conversion failed')}: {err}")
305311
return False
306312

307313

fastflix/widgets/panels/subtitle_panel.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
3+
import importlib.util
34
from typing import Union
45

56
from box import Box
@@ -116,17 +117,23 @@ def __init__(self, app, parent, index, enabled=True, first=False):
116117
extract_menu.addAction(t("Extract as .sup (image - fast)"), lambda: self.extract(use_ocr=False))
117118

118119
# Check if OCR dependencies are available
119-
ocr_action = extract_menu.addAction(t("Convert to .srt (OCR - 3-5 min)"), lambda: self.extract(use_ocr=True))
120+
ocr_action = extract_menu.addAction(
121+
t("Convert to .srt (OCR - 3-5 min)"), lambda: self.extract(use_ocr=True)
122+
)
120123

121124
# Enable OCR option only if user enabled it AND dependencies are available
122125
if not self.app.fastflix.config.enable_pgs_ocr:
123126
ocr_action.setEnabled(False)
124127
ocr_action.setToolTip(t("Enable in Settings > 'Enable PGS to SRT OCR conversion'"))
125-
elif not (self.app.fastflix.config.tesseract_path and
126-
self.app.fastflix.config.mkvmerge_path and
127-
self.app.fastflix.config.pgsrip_path):
128-
ocr_action.setEnabled(False)
129-
ocr_action.setToolTip(t("Missing dependencies: tesseract, mkvtoolnix, or pgsrip"))
128+
else:
129+
# Check if pgsrip Python library is available
130+
pgsrip_ok = importlib.util.find_spec("pgsrip") is not None
131+
132+
if not (
133+
self.app.fastflix.config.tesseract_path and self.app.fastflix.config.mkvmerge_path and pgsrip_ok
134+
):
135+
ocr_action.setEnabled(False)
136+
ocr_action.setToolTip(t("Missing dependencies: tesseract, mkvtoolnix, or pgsrip"))
130137

131138
self.widgets.extract.setMenu(extract_menu)
132139
else:
@@ -193,8 +200,12 @@ def init_move_buttons(self):
193200

194201
def extract(self, use_ocr=False):
195202
worker = ExtractSubtitleSRT(
196-
self.parent.app, self.parent.main, self.index, self.extract_completed_signal,
197-
language=self.language, use_ocr=use_ocr
203+
self.parent.app,
204+
self.parent.main,
205+
self.index,
206+
self.extract_completed_signal,
207+
language=self.language,
208+
use_ocr=use_ocr,
198209
)
199210
worker.start()
200211
self.gif_label.show()

fastflix/widgets/settings.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3+
import importlib.util
34
import logging
45
import shutil
56
from pathlib import Path
@@ -273,8 +274,7 @@ def in_dir():
273274
self.enable_pgs_ocr = QtWidgets.QCheckBox(t("Enable PGS to SRT OCR conversion"))
274275
self.enable_pgs_ocr.setChecked(self.app.fastflix.config.enable_pgs_ocr)
275276
self.enable_pgs_ocr.setToolTip(
276-
t("Convert image-based PGS subtitles to text SRT using OCR.\n"
277-
"Typically takes 3-5 minutes per movie.")
277+
t("Convert image-based PGS subtitles to text SRT using OCR.\nTypically takes 3-5 minutes per movie.")
278278
)
279279

280280
# Dependency status
@@ -314,7 +314,9 @@ def update_ocr_dependency_status(self):
314314
# Use config paths which use find_ocr_tool() - handles non-PATH locations
315315
tesseract_ok = self.app.fastflix.config.tesseract_path is not None
316316
mkvmerge_ok = self.app.fastflix.config.mkvmerge_path is not None
317-
pgsrip_ok = self.app.fastflix.config.pgsrip_path is not None
317+
318+
# Check if pgsrip Python library is available
319+
pgsrip_ok = importlib.util.find_spec("pgsrip") is not None
318320

319321
status_parts = []
320322
status_parts.append("✓ tesseract" if tesseract_ok else "✗ tesseract")
@@ -327,7 +329,7 @@ def update_ocr_dependency_status(self):
327329
status_text += "\n" + link(
328330
"https://github.com/cdgriffith/FastFlix/wiki/PGS-OCR-Setup",
329331
"Click here for installation instructions",
330-
self.app.fastflix.config.theme
332+
self.app.fastflix.config.theme,
331333
)
332334

333335
self.ocr_status_label.setText(status_text)

0 commit comments

Comments
 (0)