@@ -27,6 +27,10 @@ interface
2727 FONTSET_START = #32 ;
2828 FONTSET_END = #126 ;
2929
30+ const
31+ ALPHA_NUMERIC_SYM = [' a' ..' z' , ' A' ..' Z' , ' 0' ..' 9' ,' %' ,' &' ,' #' ,' $' ,' [' ,' ]' ,' {' ,' }' ,' @' ,' !' ,' ?' ];
32+
33+
3034type
3135 PFontCharacter = ^TFontCharacter;
3236 TFontCharacter = packed record
@@ -95,6 +99,17 @@ implementation
9599uses
96100 graphtype, intfgraphics, graphics, math;
97101
102+
103+ function ContainsAlphaNumSym (text: string): Boolean; inline;
104+ var i: Int32;
105+ begin
106+ Result := False;
107+ for i:=1 to Length(text) do
108+ if Text[i] in ALPHA_NUMERIC_SYM then
109+ Exit(True);
110+ end ;
111+
112+
98113function TFontSet.GetCharacterPoints (const Character: Char): Integer;
99114begin
100115 if (Character in [FONTSET_START..FONTSET_END]) then
@@ -678,15 +693,21 @@ function TSimpleOCR.RecognizeLines(Matrix: TIntegerMatrix; Filter: TOCRFilter; c
678693 begin
679694 // OCR the row and some extra columns
680695 Text := Self._RecognizeXY(Box(SearchBox.X1, SearchBox.Y1, SearchBox.X2, SearchBox.Y1 + (FFontSet.MaxHeight div 2 )), FontSet.CharacterPoints[Filter.MinCharacterMatch], $FFFFFF, Hits, Bounds);
681- if (Text = ' ' ) or (Bounds.Y1 = LastBounds.Y1) then
696+
697+ if (Text = ' ' ) then
682698 Exit;
683699
684- LastBounds := Bounds;
685- Result := Result + [Text];
686- TextBounds := TextBounds + [Bounds];
700+ // Ensure that actual text was extracted, not just a symbol mess of short or small character symbols.
701+ if ContainsAlphaNumSym(Text) then
702+ begin
703+ LastBounds := Bounds;
704+ Result := Result + [Text];
705+ TextBounds := TextBounds + [Bounds];
687706
688- // Move down to the found text Bounds.Y2 (minus a little) so we don't recognize this again
689- SearchBox.Y1 := Bounds.Y2 - (FFontSet.MaxHeight div 4 );
707+ // Now we can confidently skip this search line by a jump, but we dont skip it fully in case of overlapping text
708+ // So we divide the texts max glyph height by 4, and subtract that from the lower end of the found bounds.
709+ SearchBox.Y1 := Max(SearchBox.Y1, Bounds.Y2 - (FFontSet.MaxHeight div 4 ));
710+ end ;
690711 end ;
691712
692713 SearchBox.Y1 += 1 ;
0 commit comments