Skip to content

Commit 4e45046

Browse files
committed
Invert subtitle image before the OCR
It makes it work way better on my side, especially with subtitles containing multiple lines.
1 parent 089ef65 commit 4e45046

File tree

3 files changed

+18
-1
lines changed

3 files changed

+18
-1
lines changed

doc/completion.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ _vobsub2srt() {
4444

4545
case $cur in
4646
-*)
47-
COMPREPLY=( $( compgen -W '--dump-images --verbose --ifo --lang --langlist --tesseract-lang --tesseract-data --blacklist --y-threshold --min-width --min-height --max-threads' -- "$cur" ) )
47+
COMPREPLY=( $( compgen -W '--dump-images --verbose --ifo --lang --langlist --tesseract-lang --tesseract-data --blacklist --y-threshold --min-width --min-height --max-threads --disable-invert' -- "$cur" ) )
4848
;;
4949
*)
5050
_filedir '(idx|IDX|sub|SUB)'

doc/vobsub2srt.1

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ Minimum height in pixels to consider a subpicture for OCR (Default: 1).
4949
.TP
5050
\fB\-\-max\-threads\fR \fInb\fR
5151
Maximum number of threads to use to do the OCR, use 0 to autodetect the number of cores (Default: 0).
52+
\fB\-\-disable\-invert\fR
53+
By default the image will be inverted before the OCR because it works better with black on white background, disable it.
5254
.SH EXAMPLES
5355
.nf
5456
$ \fBvobsub2srt \-\-lang en foobar\fR

src/vobsub2srt.c++

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,16 @@ void do_ocr(TessBaseAPI *tess_base_api, atomic<bool> *done, vector<sub_text_t> *
142142
done->store(true);
143143
}
144144

145+
void invert_image(unsigned width, unsigned height,
146+
unsigned stride, unsigned char *image) {
147+
for (unsigned y=0; y < height; y++) {
148+
for (unsigned x=0; x < width; x++) {
149+
unsigned index = y*stride + x;
150+
image[index] = 255 - image[index];
151+
}
152+
}
153+
}
154+
145155
struct ocr_thread_t {
146156
ocr_thread_t(TessBaseAPI *tess_base_api)
147157
: tess_base_api(tess_base_api)
@@ -153,6 +163,7 @@ struct ocr_thread_t {
153163

154164
int main(int argc, char **argv) {
155165
bool dump_images = false;
166+
bool disable_invert = false;
156167
bool verb = false;
157168
bool list_languages = false;
158169
std::string ifo_file;
@@ -186,6 +197,7 @@ int main(int argc, char **argv) {
186197
add_option("min-width", min_width, "Minimum width in pixels to consider a subpicture for OCR (Default: 9)").
187198
add_option("min-height", min_height, "Minimum height in pixels to consider a subpicture for OCR (Default: 1)").
188199
add_option("max-threads", max_threads, "Maximum number of threads to use to do the OCR, use 0 to autodetect the number of cores (Default: 0)").
200+
add_option("disable-invert", disable_invert, "By default the image will be inverted before the OCR because it works better with black on white background, disable it").
189201
add_unnamed(subname, "subname", "name of the subtitle files WITHOUT .idx/.sub ending! (REQUIRED)");
190202
if(not opts.parse_cmd(argc, argv) or subname.empty()) {
191203
return 1;
@@ -344,6 +356,9 @@ int main(int argc, char **argv) {
344356
unsigned char *image_cpy = (unsigned char *)malloc(image_size);
345357
memcpy(image_cpy, image, image_size);
346358

359+
if (!disable_invert)
360+
invert_image(width, height, stride, image_cpy);
361+
347362
if (max_threads == 1)
348363
do_ocr(ocr_thread->tess_base_api, &ocr_thread->done, &conv_subs, &mut, sub_counter, width, height, stride, image_cpy, start_pts, end_pts, verb);
349364
else {

0 commit comments

Comments
 (0)