Skip to content

Commit 0008430

Browse files
committed
will flake8 like me now?
1 parent ef5a22d commit 0008430

File tree

3 files changed

+78
-27
lines changed

3 files changed

+78
-27
lines changed

.github/workflows/pytest.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
runs-on: ubuntu-latest
99
strategy:
1010
matrix:
11-
python-version: ["3.6", "3.7", "3.8", "3.9","3.10","3.11","3.12","3.13"]
11+
python-version: ["3.9","3.10","3.11","3.12","3.13"]
1212

1313
steps:
1414
- uses: szenius/set-timezone@v1.2
@@ -24,5 +24,5 @@ jobs:
2424
- uses: pavelzw/pytest-action@v2
2525
with:
2626
emoji: false
27-
verbose: false
27+
verbose: true
2828
job-summary: true

subtotxt.py

Lines changed: 75 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
"""Subtitle to plain Text converter: Handles .srt, .vtt, .ssa, .ass files."""
2+
13
# cSpell:disable
2-
# SRT or WEBVTT to plain Text
4+
# SRT, ASS/SSA or WEBVTT to plain Text
35
# Author: NebularNerd
46
# Version: 2025-02-03
57
# https://github.com/NebularNerd/subtotxt
@@ -15,6 +17,7 @@
1517

1618

1719
def missing_modules_installer(required_modules):
20+
"""Auto module installer, fairly clever, will run if it finds modules are missing."""
1821
import platform
1922

2023
if float(platform.python_version().rsplit(".", 1)[0].strip()) < 3.12: # pkg_resources method
@@ -65,13 +68,17 @@ def missing_modules_installer(required_modules):
6568

6669

6770
class file_handler:
71+
"""Get the file ready for action"""
72+
6873
def __init__(self):
74+
"""Variables have the following purposes."""
6975
self.i = None # Input file
7076
self.o = None # Output file
7177
self.c = None # Copy file
7278
self.overw = None # Overwrite
7379

7480
def set_file(self, i):
81+
"""Set file input, then create output names."""
7582
i = Path(i)
7683
if i.is_file():
7784
self.i = i
@@ -82,30 +89,39 @@ def set_file(self, i):
8289
raise FileNotFoundError(f"File '{i}' not found.")
8390

8491
def set_over(self, x):
92+
"""Overwrite existing output file without asking."""
8593
self.overw = x
8694

8795

8896
class encoding:
97+
"""Figure out what encoding the subtitle has, override output encoding if desired."""
98+
8999
def __init__(self):
100+
"""Variables have the following purposes."""
90101
self.res = None # Check encoding
91102
self.enc = None # Detected encoding
92103
self.out = None # Output encoding
93104

94105
def check_encoding(self):
95-
self.res = from_path(file.i).best() # charset_normalizer guess encoding
106+
"""charset_normalizer guess encoding."""
107+
self.res = from_path(file.i).best()
96108
self.enc = self.res.encoding
97109
if self.res is not None and self.enc == "utf_8" and self.res.bom:
98110
self.enc += "_sig" # adds sig for utf_8_sig/bom files
99111
print(f"Detected Character Encoding: {self.enc}")
100112
print(f"Confidence of encoding: {int((1.0 - self.res.chaos) * 100)}%")
101113

102114
def force_utf8(self, x):
115+
"""Force UTF8 output regardless of input encoding."""
103116
print("Output encoding forced to UTF-8" if x else "Output will use input encoding")
104117
self.out = "utf_8" if x else self.enc
105118

106119

107120
class subtitle:
121+
"""Wrangle and mangle to file into nice readable text."""
122+
108123
def __init__(self):
124+
"""Variables have the following purposes."""
109125
self.format = None # Which subtitle format
110126
self.text = "" # The output text
111127
self.text_finished = "" # The output text after a final check
@@ -117,6 +133,17 @@ def __init__(self):
117133
self.oneline = False # If True attempts to join longer lines
118134

119135
def testsub(self):
136+
"""
137+
Opens subtitle file and attempts to detect encoding used.
138+
139+
Notes:
140+
A file may appear as `UTF8` in some programs but be detects as `ascii` here,
141+
this is not a bug. `ascii` just means there are no characters in the file beyond the
142+
standard character set.
143+
144+
Chinese and near neighbours/dialects have many many encodings, sometimes the wrong one may
145+
be choosen but it should not affect output.
146+
"""
120147
with open(file.i, "r", encoding=enc.enc) as ts:
121148
for line in ts:
122149
if "WEBVTT" in line:
@@ -127,32 +154,42 @@ def testsub(self):
127154
self.format = "ass"
128155

129156
def junklist(self):
130-
# This list will grow
131-
# Escaping and r(raw) tag needed for special characters
157+
"""
158+
List of junk strings, characters, control codes we wish to remove.
159+
160+
This list will grow/adapt over time.
161+
Escaping and r(raw) tag needed for special characters
162+
"""
132163
j = ["<.*?>", r"\{.*?\}", r"\[.*\]", r"\(.*\)", r"^-\s"]
133164
if self.no_names:
134165
j.append("^.*?:")
135166
return j
136167

137168
def set_no_names(self, x):
169+
"""If True: Strip names from lines, e.g.: `Blackadder: You're name is Bob?`."""
138170
self.no_names = x
139171
self.junk = self.junklist()
140172

141173
def set_no_sort(self, x):
174+
"""If True: Prevents .ass/.ssa subs from being sorted by timecode"""
142175
self.nosrt = x
143176

144177
def screen_output(self, x):
178+
"""If True: Outputs processed content to screen/console."""
145179
self.scr = x
146180

147181
def one_line(self, x):
182+
"""If True: Sets one line function, attempts to join split sentences."""
148183
self.oneline = x
149184

150185

151-
def cls(): # Clear screen win/*nix friendly
186+
def cls():
187+
"""Clear screen win/*nix friendly."""
152188
os.system("cls" if os.name == "nt" else "clear")
153189

154190

155-
def yn(yn): # Simple Y/N selector, use yn(text_for_choice)
191+
def yn(yn):
192+
"""Simple Y/N selector, use yn(text_for_choice)."""
156193
while True:
157194
print(f"{yn} [Y/N]")
158195
choice = input().lower()
@@ -165,6 +202,7 @@ def yn(yn): # Simple Y/N selector, use yn(text_for_choice)
165202

166203

167204
def arguments():
205+
"""Everyone loves arguments, here's a list of them."""
168206
parser = argparse.ArgumentParser(
169207
formatter_class=argparse.RawDescriptionHelpFormatter,
170208
description="Quickly convert SRT, SSA or WEBVTT subtitles into plain text file.",
@@ -261,6 +299,7 @@ def arguments():
261299

262300

263301
def overwrite_old_file(f):
302+
"""Politely check if there is an exiting file before moving forward."""
264303
if f.is_file():
265304
if (not file.overw and yn("Output file already exists, delete and make a new one?")) or file.overw:
266305
print("Overwriting old file")
@@ -270,6 +309,7 @@ def overwrite_old_file(f):
270309

271310

272311
def copy():
312+
"""This just copies a file line by line, handy for checking encoding issues without processing the file."""
273313
overwrite_old_file(file.c)
274314
with open(file.i, "r", encoding=enc.enc) as original, open(file.c, "w", encoding=enc.out) as new:
275315
for line in original:
@@ -280,8 +320,7 @@ def copy():
280320

281321

282322
def junk_strip(line):
283-
# Based on PR #4 by eMPee584
284-
# Looping is terrible, but, a required evil it seems
323+
"""Based on PR #4 by eMPee584. Looping is terrible, but, a required evil it seems."""
285324
for junk in sub.junk:
286325
try:
287326
line = re.sub(rf"{junk}", "", line)
@@ -291,6 +330,7 @@ def junk_strip(line):
291330

292331

293332
def process_line(line):
333+
"""Process each line, remove formatting junk, check for duplicates, store for writing later."""
294334
# Strip formatting junk from line
295335
# We do this before checking for duplicates
296336
line = junk_strip(line).strip()
@@ -316,9 +356,11 @@ def process_line(line):
316356

317357

318358
def do_srt():
319-
# SubRip subtitle file .srt
320-
# https://en.wikipedia.org/wiki/SubRip
321-
# Format has a line number followed by a timecode on the next line, then text.
359+
"""
360+
SubRip subtitle file .srt format.
361+
https://en.wikipedia.org/wiki/SubRip
362+
Format has a line number followed by a timecode on the next line, then text.
363+
"""
322364
print("Processing file as SubRip subtitles [.srt]")
323365
with open(file.i, "r", encoding=enc.enc) as original:
324366
subnum = 1
@@ -331,12 +373,15 @@ def do_srt():
331373

332374

333375
def do_vtt():
334-
# WebVTT (Web Video Text Tracks) subtitle file .vtt
335-
# https://en.wikipedia.org/wiki/WebVTT
336-
# https://www.checksub.com/blog/guide-use-webvtt-subtitles-format
337-
# This format has a few differing 'standards', you have:
338-
# Metadata, notes, styles, timceodes with optional hours, and optional line numbers,
339-
# almost none of which are actually used it seems. But we need to handle them
376+
"""
377+
WebVTT (Web Video Text Tracks) subtitle file .vtt format.
378+
379+
https://en.wikipedia.org/wiki/WebVTT
380+
https://www.checksub.com/blog/guide-use-webvtt-subtitles-format
381+
This format has a few differing `standards`, you have:
382+
Metadata, notes, styles, timceodes with optional hours, and optional line numbers,
383+
almost none of which are actually used it seems. But we need to handle them.
384+
"""
340385
print("Processing file as WebVTT (Web Video Text Tracks) [.vtt]")
341386
with open(file.i, "r", encoding=enc.enc) as original:
342387
subnum = 1
@@ -355,13 +400,16 @@ def do_vtt():
355400

356401

357402
def do_ass():
358-
# SubStation Alpha subtitle file .ssa/.ass
359-
# https://wiki.multimedia.cx/index.php?title=SubStation_Alpha
360-
# http://www.tcax.org/docs/ass-specs.htm Browser may complain as not https site.
361-
# This format has different version, later ones include more metadata and sections,
362-
# this should not be a big problem as the text is always on a `Dialog:` line.
363-
# Two keys issues are; lines may not be in timecode order,
364-
# text may be for labelling things and not part of the script.
403+
"""
404+
SubStation Alpha subtitle file .ssa/.ass format.
405+
406+
https://wiki.multimedia.cx/index.php?title=SubStation_Alpha
407+
http://www.tcax.org/docs/ass-specs.htm Browser may complain as not https site.
408+
This format has different version, later ones include more metadata and sections,
409+
this should not be a big problem as the text is always on a `Dialog:` line.
410+
Two keys issues are; lines may not be in timecode order,
411+
text may be for labelling objects and not part of the script.
412+
"""
365413
print("Processing file as SubStation Alpha subtitle [.ssa/.ass]")
366414
with open(file.i, "r", encoding=enc.enc) as original:
367415
# Try and get version
@@ -393,6 +441,7 @@ def do_ass():
393441

394442

395443
def write_to_file():
444+
"""Outputs finished work to a new file in the selected encoding."""
396445
with open(file.o, "w", encoding=enc.out) as new:
397446
# We check for junk again because it can gets split over two lines and we can't find it until now.
398447
for line in sub.text.splitlines():
@@ -401,6 +450,7 @@ def write_to_file():
401450

402451

403452
def do_work():
453+
"""Process file based on sub.format, additionally check if there is a file from a previous run."""
404454
overwrite_old_file(file.o)
405455
if sub.format == "srt":
406456
do_srt()
@@ -413,6 +463,7 @@ def do_work():
413463

414464

415465
def check_it_works(in_file): # Pytest runner
466+
"""This is for running Pytests, as we need to be able to set various variables."""
416467
try:
417468
file.set_file(in_file["test_file"])
418469
file.o = Path(in_file["test_outf"]) # Override normal output file

tests/test_ass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
""" Test SubStation Alpha Subtitles. """
1+
"""Test SubStation Alpha Subtitles."""
22

33
# cSpell: disable
44
# Test SubStation Alpha Subtitles 4.00+

0 commit comments

Comments
 (0)