Skip to content

Commit 88b492f

Browse files
committed
Better fix infinite recursion + Better line detection.
With this change I am assuming text that we want to extract with this actually contains some text, and not just a row of _ . , and -
1 parent d081180 commit 88b492f

File tree

2 files changed

+29
-7
lines changed

2 files changed

+29
-7
lines changed

simpleocr.engine.pas

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ interface
2727
FONTSET_START = #32;
2828
FONTSET_END = #126;
2929

30+
const
31+
ALPHA_NUMERIC_SYM = ['a'..'z', 'A'..'Z', '0'..'9','%','&','#','$','[',']','{','}','@','!','?'];
32+
33+
3034
type
3135
PFontCharacter = ^TFontCharacter;
3236
TFontCharacter = packed record
@@ -95,6 +99,17 @@ implementation
9599
uses
96100
graphtype, intfgraphics, graphics, math;
97101

102+
103+
function ContainsAlphaNumSym(text: string): Boolean; inline;
104+
var i: Int32;
105+
begin
106+
Result := False;
107+
for i:=1 to Length(text) do
108+
if Text[i] in ALPHA_NUMERIC_SYM then
109+
Exit(True);
110+
end;
111+
112+
98113
function TFontSet.GetCharacterPoints(const Character: Char): Integer;
99114
begin
100115
if (Character in [FONTSET_START..FONTSET_END]) then
@@ -678,15 +693,21 @@ function TSimpleOCR.RecognizeLines(Matrix: TIntegerMatrix; Filter: TOCRFilter; c
678693
begin
679694
// OCR the row and some extra columns
680695
Text := Self._RecognizeXY(Box(SearchBox.X1, SearchBox.Y1, SearchBox.X2, SearchBox.Y1 + (FFontSet.MaxHeight div 2)), FontSet.CharacterPoints[Filter.MinCharacterMatch], $FFFFFF, Hits, Bounds);
681-
if (Text = '') or (Bounds.Y1 = LastBounds.Y1) then
696+
697+
if (Text = '') then
682698
Exit;
683699

684-
LastBounds := Bounds;
685-
Result := Result + [Text];
686-
TextBounds := TextBounds + [Bounds];
700+
// Ensure that actual text was extracted, not just a symbol mess of short or small character symbols.
701+
if ContainsAlphaNumSym(Text) then
702+
begin
703+
LastBounds := Bounds;
704+
Result := Result + [Text];
705+
TextBounds := TextBounds + [Bounds];
687706

688-
// Move down to the found text Bounds.Y2 (minus a little) so we don't recognize this again
689-
SearchBox.Y1 := Bounds.Y2 - (FFontSet.MaxHeight div 4);
707+
// Now we can confidently skip this search line by a jump, but we dont skip it fully in case of overlapping text
708+
// So we divide the texts max glyph height by 4, and subtract that from the lower end of the found bounds.
709+
SearchBox.Y1 := Max(SearchBox.Y1, Bounds.Y2 - (FFontSet.MaxHeight div 4));
710+
end;
690711
end;
691712

692713
SearchBox.Y1 += 1;

tester/tester.lpr

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ procedure Test_MultiLine3;
129129
Lines := SimpleOCR.RecognizeLines(LoadMatrix('images/multiline3.png'), Filter1, FONT_PLAIN_11);
130130
for I := 0 to High(Lines) do
131131
Lines[I] := StringReplace(Lines[I], 'I', 'l', [rfReplaceAll]);
132-
132+
WriteLn(Lines[1]);
133133
Assert(Length(Lines) = 5);
134134
Assert(Lines[0] = 'Leather Boots:');
135135
Assert(Lines[1] = 'Adamant Kiteshield:');
@@ -292,6 +292,7 @@ procedure Test_Threshold1;
292292
MinCharacterMatch: #0;
293293
);
294294
begin
295+
WriteLn(SimpleOCR.Recognize(LoadMatrix('images/thresh.png'), Filter, FONT_BOLD_12));
295296
Assert(SimpleOCR.Recognize(LoadMatrix('images/thresh.png'), Filter, FONT_BOLD_12) = 'Showing items: hello');
296297
end;
297298

0 commit comments

Comments
 (0)