Skip to content

Commit 5383d3c

Browse files
FEAT: Adding the Removal of Descriptive Subtitles Lines in the Subtitles Translations with DeepL Python Project
1 parent db2fa51 commit 5383d3c

File tree

1 file changed

+38
-0
lines changed
  • Subtitles Translations with DeepL

1 file changed

+38
-0
lines changed

Subtitles Translations with DeepL/main.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class BackgroundColors: # Colors for the terminal
7777

7878
# Execution Constants:
7979
VERBOSE = False # Set to True to output verbose messages
80+
DESCRIPTIVE_SUBTITLES_REMOVAL = True # Set to True to remove descriptive lines (e.g., [music], (laughs)) from SRT before translation
8081
DEEPL_API_KEY = "" # DeepL API key (will be loaded in load_dotenv function)
8182
INPUT_DIR = f"./Input" # Directory containing the input SRT files
8283
OUTPUT_DIR = Path("./Output") # Base output directory
@@ -179,6 +180,40 @@ def read_srt(file_path):
179180
with open(file_path, "r", encoding="utf-8") as f: # Open the SRT file for reading
180181
return f.readlines() # Read all lines and return as a list
181182

183+
def remove_descriptive_subtitles(file_path):
184+
"""
185+
Removes descriptive lines from the SRT file, such as text within brackets or parentheses.
186+
Overwrites the original SRT file with cleaned lines.
187+
These cleaned lines are used for translation.
188+
189+
:param file_path: Path to the SRT file
190+
:return: List of cleaned lines
191+
"""
192+
193+
verbose_output(f"{BackgroundColors.GREEN}Removing descriptive subtitles from: {BackgroundColors.CYAN}{file_path}{Style.RESET_ALL}") # Verbose message
194+
195+
cleaned_lines = [] # Store cleaned lines
196+
197+
with open(file_path, "r", encoding="utf-8") as f: # Open SRT for reading
198+
for line in f: # Iterate through each line
199+
stripped = line.strip() # Remove leading/trailing whitespace
200+
201+
if stripped == "" or stripped.replace(":", "").replace(",", "").isdigit() or "-->" in line: # If line is empty, timing, or index
202+
cleaned_lines.append(line.rstrip("\n")) # Keep timing/index/empty lines as is
203+
continue # Skip further checks
204+
205+
if stripped.startswith("[") and stripped.endswith("]"): # If line is descriptive (in brackets)
206+
continue # Skip descriptive lines
207+
if stripped.startswith("(") and stripped.endswith(")"): # If line is descriptive (in parentheses)
208+
continue # Skip descriptive lines
209+
210+
cleaned_lines.append(stripped) # Keep normal text lines
211+
212+
with open(file_path, "w", encoding="utf-8") as f: # Open SRT for writing
213+
f.write("\n".join(cleaned_lines)) # Overwrite SRT with cleaned lines
214+
215+
return cleaned_lines # Return cleaned lines for translation
216+
182217
def get_remaining_characters(translator):
183218
"""
184219
Checks remaining characters available in DeepL free API plan.
@@ -338,6 +373,9 @@ def main():
338373

339374
srt_lines = read_srt(srt_file) # Read SRT
340375

376+
if DESCRIPTIVE_SUBTITLES_REMOVAL: # Remove descriptive subtitles if enabled
377+
srt_lines = remove_descriptive_subtitles(srt_file) # Clean SRT lines
378+
341379
translated_lines = translate_srt_lines(srt_lines) # Translate
342380

343381
relative_path = srt_file.relative_to(INPUT_DIR).parent # Get relative path

0 commit comments

Comments
 (0)