@@ -281,6 +281,13 @@ void *init_ocr(int lang_index)
281281 // set PSM mode
282282 TessBaseAPISetPageSegMode (ctx -> api , ccx_options .psm );
283283
284+ // Set character blacklist to prevent common OCR errors (e.g. | vs I)
285+ // These characters are rarely used in subtitles but often misrecognized
286+ if (ccx_options .ocr_blacklist )
287+ {
288+ TessBaseAPISetVariable (ctx -> api , "tessedit_char_blacklist" , "|\\`_~" );
289+ }
290+
284291 free (pars_vec );
285292 free (pars_values );
286293
@@ -351,6 +358,176 @@ BOX *ignore_alpha_at_edge(png_byte *alpha, unsigned char *indata, int w, int h,
351358 return cropWindow ;
352359}
353360
361+ /**
362+ * Structure to hold the vertical boundaries of a detected text line.
363+ */
364+ struct line_bounds
365+ {
366+ int start_y ; // Top row of line (inclusive)
367+ int end_y ; // Bottom row of line (inclusive)
368+ };
369+
370+ /**
371+ * Detects horizontal text line boundaries in a bitmap by finding rows of
372+ * fully transparent pixels that separate lines of text.
373+ *
374+ * @param alpha Palette alpha values (indexed by pixel value)
375+ * @param indata Bitmap pixel data (palette indices, w*h bytes)
376+ * @param w Image width
377+ * @param h Image height
378+ * @param lines Output: allocated array of line boundaries (caller must free)
379+ * @param num_lines Output: number of lines found
380+ * @param min_gap Minimum consecutive transparent rows to count as line separator
381+ * @return 0 on success, -1 on failure
382+ */
383+ static int detect_text_lines (png_byte * alpha , unsigned char * indata ,
384+ int w , int h ,
385+ struct line_bounds * * lines , int * num_lines ,
386+ int min_gap )
387+ {
388+ if (!alpha || !indata || !lines || !num_lines || w <= 0 || h <= 0 )
389+ return -1 ;
390+
391+ * lines = NULL ;
392+ * num_lines = 0 ;
393+
394+ // Allocate array to track which rows have visible content
395+ int * row_has_content = (int * )malloc (h * sizeof (int ));
396+ if (!row_has_content )
397+ return -1 ;
398+
399+ // Scan each row to determine if it has any visible (non-transparent) pixels
400+ for (int i = 0 ; i < h ; i ++ )
401+ {
402+ row_has_content [i ] = 0 ;
403+ for (int j = 0 ; j < w ; j ++ )
404+ {
405+ int index = indata [i * w + j ];
406+ if (alpha [index ] != 0 )
407+ {
408+ row_has_content [i ] = 1 ;
409+ break ; // Found visible pixel, no need to check rest of row
410+ }
411+ }
412+ }
413+
414+ // Count lines by finding runs of content rows separated by gaps
415+ int max_lines = (h / 2 ) + 1 ; // Conservative upper bound
416+ struct line_bounds * temp_lines = (struct line_bounds * )malloc (max_lines * sizeof (struct line_bounds ));
417+ if (!temp_lines )
418+ {
419+ free (row_has_content );
420+ return -1 ;
421+ }
422+
423+ int line_count = 0 ;
424+ int in_line = 0 ;
425+ int line_start = 0 ;
426+ int gap_count = 0 ;
427+
428+ for (int i = 0 ; i < h ; i ++ )
429+ {
430+ if (row_has_content [i ])
431+ {
432+ if (!in_line )
433+ {
434+ // Start of a new line
435+ line_start = i ;
436+ in_line = 1 ;
437+ }
438+ gap_count = 0 ;
439+ }
440+ else
441+ {
442+ if (in_line )
443+ {
444+ gap_count ++ ;
445+ if (gap_count >= min_gap )
446+ {
447+ // End of line found (gap is large enough)
448+ if (line_count < max_lines )
449+ {
450+ temp_lines [line_count ].start_y = line_start ;
451+ temp_lines [line_count ].end_y = i - gap_count ;
452+ line_count ++ ;
453+ }
454+ in_line = 0 ;
455+ gap_count = 0 ;
456+ }
457+ }
458+ }
459+ }
460+
461+ // Handle last line if we ended while still in a line
462+ if (in_line && line_count < max_lines )
463+ {
464+ temp_lines [line_count ].start_y = line_start ;
465+ // Find the last row with content
466+ int last_content = h - 1 ;
467+ while (last_content > line_start && !row_has_content [last_content ])
468+ last_content -- ;
469+ temp_lines [line_count ].end_y = last_content ;
470+ line_count ++ ;
471+ }
472+
473+ free (row_has_content );
474+
475+ if (line_count == 0 )
476+ {
477+ free (temp_lines );
478+ return -1 ;
479+ }
480+
481+ // Shrink allocation to actual size
482+ * lines = (struct line_bounds * )realloc (temp_lines , line_count * sizeof (struct line_bounds ));
483+ if (!* lines )
484+ {
485+ * lines = temp_lines ; // Keep original if realloc fails
486+ }
487+ * num_lines = line_count ;
488+
489+ return 0 ;
490+ }
491+
492+ /**
493+ * Performs OCR on a single text line image using PSM 7 (single line mode).
494+ *
495+ * @param ctx OCR context (contains Tesseract API)
496+ * @param line_pix Pre-processed PIX for single line (grayscale, inverted)
497+ * @return Recognized text (caller must free with free()), or NULL on failure
498+ */
499+ static char * ocr_single_line (struct ocrCtx * ctx , PIX * line_pix )
500+ {
501+ if (!ctx || !ctx -> api || !line_pix )
502+ return NULL ;
503+
504+ // Save current PSM
505+ int saved_psm = TessBaseAPIGetPageSegMode (ctx -> api );
506+
507+ // Set PSM 7 for single line recognition
508+ TessBaseAPISetPageSegMode (ctx -> api , 7 ); // PSM_SINGLE_LINE
509+
510+ // Perform OCR
511+ TessBaseAPISetImage2 (ctx -> api , line_pix );
512+ BOOL ret = TessBaseAPIRecognize (ctx -> api , NULL );
513+
514+ char * text = NULL ;
515+ if (!ret )
516+ {
517+ char * tess_text = TessBaseAPIGetUTF8Text (ctx -> api );
518+ if (tess_text )
519+ {
520+ text = strdup (tess_text );
521+ TessDeleteText (tess_text );
522+ }
523+ }
524+
525+ // Restore original PSM
526+ TessBaseAPISetPageSegMode (ctx -> api , saved_psm );
527+
528+ return text ;
529+ }
530+
354531void debug_tesseract (struct ocrCtx * ctx , char * dump_path )
355532{
356533#ifdef OCR_DEBUG
@@ -397,6 +574,8 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
397574 unsigned int * data , * ppixel ;
398575 BOOL tess_ret = FALSE;
399576 struct ocrCtx * ctx = arg ;
577+ char * combined_text = NULL ; // Used by line-split mode
578+ size_t combined_len = 0 ; // Used by line-split mode
400579 pix = pixCreate (w , h , 32 );
401580 color_pix = pixCreate (w , h , 32 );
402581 if (pix == NULL || color_pix == NULL )
@@ -476,6 +655,98 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
476655 return NULL ;
477656 }
478657
658+ // Line splitting mode: detect lines and OCR each separately with PSM 7
659+ if (ccx_options .ocr_line_split && h > 30 )
660+ {
661+ struct line_bounds * lines = NULL ;
662+ int num_lines = 0 ;
663+
664+ // Use min_gap of 3 rows to detect line boundaries
665+ if (detect_text_lines (alpha , indata , w , h , & lines , & num_lines , 3 ) == 0 && num_lines > 1 )
666+ {
667+ // Multiple lines detected - process each separately with PSM 7
668+ // (combined_text and combined_len are declared at function scope)
669+
670+ for (int line_idx = 0 ; line_idx < num_lines ; line_idx ++ )
671+ {
672+ int line_h = lines [line_idx ].end_y - lines [line_idx ].start_y + 1 ;
673+ if (line_h <= 0 )
674+ continue ;
675+
676+ // Extract line region from the grayscale image
677+ BOX * line_box = boxCreate (0 , lines [line_idx ].start_y ,
678+ pixGetWidth (cpix_gs ), line_h );
679+ PIX * line_pix_raw = pixClipRectangle (cpix_gs , line_box , NULL );
680+ boxDestroy (& line_box );
681+
682+ if (line_pix_raw )
683+ {
684+ // Add white padding around the line (helps Tesseract with edge characters)
685+ // The image is inverted (dark text on light bg), so add white (255) border
686+ int padding = 10 ;
687+ PIX * line_pix = pixAddBorderGeneral (line_pix_raw , padding , padding , padding , padding , 255 );
688+ pixDestroy (& line_pix_raw );
689+ if (!line_pix )
690+ continue ;
691+ char * line_text = ocr_single_line (ctx , line_pix );
692+ pixDestroy (& line_pix );
693+
694+ if (line_text )
695+ {
696+ // Trim trailing whitespace from line
697+ size_t line_len = strlen (line_text );
698+ while (line_len > 0 && (line_text [line_len - 1 ] == '\n' ||
699+ line_text [line_len - 1 ] == '\r' ||
700+ line_text [line_len - 1 ] == ' ' ))
701+ {
702+ line_text [-- line_len ] = '\0' ;
703+ }
704+
705+ if (line_len > 0 )
706+ {
707+ // Append to combined result
708+ size_t new_len = combined_len + line_len + 2 ; // +1 for newline, +1 for null
709+ char * new_combined = (char * )realloc (combined_text , new_len );
710+ if (new_combined )
711+ {
712+ combined_text = new_combined ;
713+ if (combined_len > 0 )
714+ {
715+ combined_text [combined_len ++ ] = '\n' ;
716+ }
717+ strcpy (combined_text + combined_len , line_text );
718+ combined_len += line_len ;
719+ }
720+ }
721+ free (line_text );
722+ }
723+ }
724+ }
725+
726+ free (lines );
727+
728+ if (combined_text && combined_len > 0 )
729+ {
730+ // Successfully processed lines - skip whole-image OCR
731+ // but continue to color detection below
732+ goto line_split_color_detection ;
733+ }
734+
735+ // If we got here, line splitting didn't produce results
736+ // Fall through to whole-image OCR
737+ if (combined_text )
738+ free (combined_text );
739+ combined_text = NULL ;
740+ }
741+ else
742+ {
743+ // Line detection failed or only 1 line - fall through to whole-image OCR
744+ if (lines )
745+ free (lines );
746+ }
747+ }
748+
749+ // Standard whole-image OCR path
479750 TessBaseAPISetImage2 (ctx -> api , cpix_gs );
480751 tess_ret = TessBaseAPIRecognize (ctx -> api , NULL );
481752 debug_tesseract (ctx , "./temp/" );
@@ -518,6 +789,14 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
518789 fatal (EXIT_NOT_ENOUGH_MEMORY , "In ocr_bitmap: Out of memory allocating text_out." );
519790 }
520791
792+ // Jump target for line-split mode: use combined_text and continue with color detection
793+ if (0 )
794+ {
795+ line_split_color_detection :
796+ text_out = combined_text ;
797+ combined_text = NULL ; // Transfer ownership
798+ }
799+
521800 // Begin color detection
522801 // Using tlt_config.nofontcolor or ccx_options.nofontcolor (true when "--no-fontcolor" parameter used) to skip color detection if not required
523802 // This is also skipped if --no-spupngocr is set since the OCR output won't be used anyway
0 commit comments