1010import re
1111from pathlib import Path
1212
13+
1314version = "2025-02-03"
1415
1516
@@ -64,54 +65,88 @@ def missing_modules_installer(required_modules):
6465
6566
6667class file_handler :
67- def __init__ (self , i ):
68+ def __init__ (self ):
69+ self .i = None # Input file
70+ self .o = None # Output file
71+ self .c = None # Copy file
72+ self .overw = None # Overwrite
73+
74+ def set_file (self , i ):
75+ i = Path (i )
6876 if i .is_file ():
6977 self .i = i
7078 self .o = i .with_suffix (".txt" )
7179 self .c = i .with_stem (f"{ Path (i ).stem } -copy" )
7280 print (f"Input file: { i } " )
7381 else :
74- raise Exception (f"File { i } not found." )
82+ raise FileNotFoundError (f"File '{ i } ' not found." )
83+
84+ def set_over (self , x ):
85+ self .overw = x
7586
7687
7788class encoding :
78- def __init__ (self , i ):
79- self .res = from_path (i ).best () # charset_normalizer guess encoding
89+ def __init__ (self ):
90+ self .res = None # Check encoding
91+ self .enc = None # Detected encoding
92+ self .out = None # Output encoding
93+
94+ def check_encoding (self ):
95+ self .res = from_path (file .i ).best () # charset_normalizer guess encoding
8096 self .enc = self .res .encoding
81- self .out = "utf_8" if args .utf8 else self .enc
8297 if self .res is not None and self .enc == "utf_8" and self .res .bom :
8398 self .enc += "_sig" # adds sig for utf_8_sig/bom files
8499 print (f"Detected Character Encoding: { self .enc } " )
85100 print (f"Confidence of encoding: { int ((1.0 - self .res .chaos ) * 100 )} %" )
86- print ("Output encoding forced to UTF-8" if args .utf8 else "Output will use input encoding" )
101+
102+ def force_utf8 (self , x ):
103+ print ("Output encoding forced to UTF-8" if x else "Output will use input encoding" )
104+ self .out = "utf_8" if x else self .enc
87105
88106
89107class subtitle :
90108 def __init__ (self ):
91- self .format = self . testsub () # Which subtitle format
109+ self .format = None # Which subtitle format
92110 self .text = "" # The output text
93111 self .text_finished = "" # The output text after a final check
94112 self .prev = "" # Previously read line, prevents duplicates
95- self .junk = self .junklist ()
113+ self .junk = None # Junk remover list, set below
114+ self .no_names = False # If True removes names from subtitles
115+ self .nosrt = False # If True leaves subs in file order, not timecode order
116+ self .scr = False # If True outputs to screen as each line processed
117+ self .oneline = False # If True attempts to join longer lines
96118
97119 def testsub (self ):
98120 with open (file .i , "r" , encoding = enc .enc ) as ts :
99121 for line in ts :
100122 if "WEBVTT" in line :
101- return "vtt"
123+ self . format = "vtt"
102124 if line .strip ("\n " ) == "1" and re .search ("(.*:.*:.*-->.*:.*:.*)" , next (ts )):
103- return "srt"
125+ self . format = "srt"
104126 if any (s in line for s in ["!:" , "Timer:" , "Style:" , "Comment:" , "Dialogue:" , "ScriptType:" ]):
105- return "ass"
127+ self . format = "ass"
106128
107129 def junklist (self ):
108130 # This list will grow
109131 # Escaping and r(raw) tag needed for special characters
110132 j = ["<.*?>" , r"\{.*?\}" , r"\[.*\]" , r"\(.*\)" , r"^-\s" ]
111- if args . nonames :
133+ if self . no_names :
112134 j .append ("^.*?:" )
113135 return j
114136
137+ def set_no_names (self , x ):
138+ self .no_names = x
139+ self .junk = self .junklist ()
140+
141+ def set_no_sort (self , x ):
142+ self .nosrt = x
143+
144+ def screen_output (self , x ):
145+ self .scr = x
146+
147+ def one_line (self , x ):
148+ self .oneline = x
149+
115150
116151def cls (): # Clear screen win/*nix friendly
117152 os .system ("cls" if os .name == "nt" else "clear" )
@@ -214,20 +249,28 @@ def arguments():
214249 required = False ,
215250 help = "For SubStation Alpha (.ssa/.ass), do not sort by timecode." ,
216251 )
252+ parser .add_argument (
253+ "--debug" ,
254+ "-db" ,
255+ default = False ,
256+ action = "store_true" ,
257+ required = False ,
258+ help = "Give Traceback output if the script fails" ,
259+ )
217260 return parser .parse_args ()
218261
219262
220- def overwrite (f ):
263+ def overwrite_old_file (f ):
221264 if f .is_file ():
222- if (not args . overwrite and yn ("Output file already exists, delete and make a new one?" )) or args . overwrite :
265+ if (not file . overw and yn ("Output file already exists, delete and make a new one?" )) or file . overw :
223266 print ("Overwriting old file" )
224267 send2trash (f )
225268 else :
226269 raise Exception ("Output file already exists." )
227270
228271
229272def copy ():
230- overwrite (file .c )
273+ overwrite_old_file (file .c )
231274 with open (file .i , "r" , encoding = enc .enc ) as original , open (file .c , "w" , encoding = enc .out ) as new :
232275 for line in original :
233276 if args .screen :
@@ -237,7 +280,7 @@ def copy():
237280
238281
239282def junk_strip (line ):
240- # Based on PR#4 by eMPee584
283+ # Based on PR #4 by eMPee584
241284 # Looping is terrible, but, a required evil it seems
242285 for junk in sub .junk :
243286 try :
@@ -250,13 +293,13 @@ def junk_strip(line):
250293def process_line (line ):
251294 # Strip formatting junk from line
252295 # We do this before checking for duplicates
253- line = junk_strip (line )
296+ line = junk_strip (line ). strip ()
254297 # Process line if it's not a duplicate of the previous one, or empty.
255- # Based on PR#4 by eMPee584
256- line = line . strip ()
257- if not line == sub . prev and line != "" :
258- # One liners based on PR#2 by adam-sierakowski
259- if args . oneliners :
298+ # Based on PR #4 by eMPee584
299+ # Fix for live translations giving duplicates from Issue #9 by rajibando
300+ if line . strip () and line . strip () != sub . prev . strip () :
301+ # One liners based on PR #2 by adam-sierakowski
302+ if sub . oneline :
260303 if line [- 1 ] in ["." , "?" , "!" , "…" ]:
261304 ln = f"{ line } \n "
262305 sub .text += ln
@@ -267,7 +310,7 @@ def process_line(line):
267310 ln = f"{ line } \n "
268311 sub .text += ln
269312
270- if args . screen :
313+ if sub . scr :
271314 print (ln , end = "" )
272315 sub .prev = ln
273316
@@ -316,7 +359,7 @@ def do_ass():
316359 # https://wiki.multimedia.cx/index.php?title=SubStation_Alpha
317360 # http://www.tcax.org/docs/ass-specs.htm Browser may complain as not https site.
318361 # This format has different version, later ones include more metadata and sections,
319- # this should not be a big problem as teh text is always on a `Dialog:` line.
362+ # this should not be a big problem as the text is always on a `Dialog:` line.
320363 # Two keys issues are; lines may not be in timecode order,
321364 # text may be for labelling things and not part of the script.
322365 print ("Processing file as SubStation Alpha subtitle [.ssa/.ass]" )
@@ -342,9 +385,9 @@ def do_ass():
342385 stc = x [0 ][0 ] # Start timecode
343386 nom = x [0 ][1 ] # Character speaking
344387 txt = x [0 ][2 ] # Text
345- text = txt if (args . nonames or nom == "" ) else f"{ nom } : { txt } "
388+ text = txt if (sub . no_names or nom == "" ) else f"{ nom } : { txt } "
346389 d .update ({stc : {"dialog" : text }})
347- for t in [v ["dialog" ] for k , v in sorted (d .items ())] if not args . nosort else [v ["dialog" ] for v in d .values ()]:
390+ for t in [v ["dialog" ] for k , v in sorted (d .items ())] if not sub . nosrt else [v ["dialog" ] for v in d .values ()]:
348391 process_line (t .replace (r"\n" , " " ).replace (r"\N" , " " )) # Fixes odd newline in .ass
349392 write_to_file ()
350393
@@ -358,7 +401,7 @@ def write_to_file():
358401
359402
360403def do_work ():
361- overwrite (file .o )
404+ overwrite_old_file (file .o )
362405 if sub .format == "srt" :
363406 do_srt ()
364407 elif sub .format == "vtt" :
@@ -369,35 +412,68 @@ def do_work():
369412 raise Exception ("Unable to determine Subtitle format." )
370413
371414
415+ def check_it_works (in_file ): # Pytest runner
416+ try :
417+ file .set_file (in_file ["test_file" ])
418+ file .o = Path (in_file ["test_outf" ]) # Override normal output file
419+ file .set_over (True ) # Always overwrite (although unlikely when Pytesting)
420+ enc .check_encoding ()
421+ enc .force_utf8 (in_file ["test_force" ]) # True/False
422+ sub .set_no_names (in_file ["test_names" ]) # True/False
423+ sub .set_no_sort (in_file ["test_sort" ])
424+ sub .screen_output (False ) # Pytest never needs to output to screen
425+ sub .one_line (in_file ["test_onel" ])
426+ sub .testsub ()
427+ do_work ()
428+ return
429+ except Exception as error :
430+ return f"Testing failed: { error } "
431+
432+
433+ # Init classes
434+ file = file_handler ()
435+ enc = encoding ()
436+ sub = subtitle ()
437+
438+ # Do things
372439if __name__ == "__main__" :
373440 args = arguments ()
374441 cls ()
375442 try :
376443 print (f"SUB to TXT v{ version } \n { '-' * 22 } " )
377444 if args .file or args .copy :
378- file = file_handler (Path (args .file ))
379- enc = encoding (file .i )
445+ file .set_file (args .file )
446+ file .set_over (args .overwrite )
447+ enc .check_encoding ()
448+ enc .force_utf8 (args .utf8 ) # True/False
449+ sub .set_no_names (args .nonames ) # True/False
450+ sub .set_no_sort (args .nosort ) # True/False
451+ sub .screen_output (args .screen ) # True/False
452+ sub .one_line (args .oneliners ) # True/False
453+ sub .testsub ()
380454 if args .pause and not yn ("Ready to start?" ):
381455 raise Exception ("User exited at pause before start" )
382456 if args .copy :
383457 copy ()
384458 else :
385- sub = subtitle ()
386459 do_work ()
387460 if args .dir :
388461 files = list (filter (lambda p : p .suffix in {".srt" , ".vtt" , ".ssa" , ".ass" }, Path (args .dir ).glob ("*" )))
389462 how_many = len (files )
390463 c = 0
391464 print (f"Multi file mode. Found { how_many } files." )
392465 print ("-" * 22 )
393- for file in files :
394- file = file_handler (Path (file ))
395- enc = encoding (file .i )
396- sub = subtitle ()
466+ for f in files :
467+ file .set (f )
468+ enc .force_utf8 (args .utf8 )
397469 do_work ()
398470 print ("-" * 22 )
399471 c += 1
400472 print (f"Processed { c } /{ how_many } files." )
401473 print ("\n Finished!\n " )
402474 except Exception as error :
403475 print (f"Script execution stopped because:\n { error } " )
476+ if args .debug :
477+ import traceback
478+
479+ print (traceback .format_exc ())
0 commit comments