Skip to content

Commit 2822995

Browse files
committed
change how OCR checks if a string has proper text
1 parent 72af183 commit 2822995

File tree

1 file changed

+27
-8
lines changed

1 file changed

+27
-8
lines changed

Source/simba.pixelocr.pas

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ TPixelOCR = record
7575
function RecognizeLines(Image: TSimbaImage; constref Font: TPixelFont; Bounds: TBox): TStringArray;
7676
end;
7777

78-
const
79-
ALPHA_NUM_SYMBOLS = ['a'..'z', 'A'..'Z', '0'..'9', '%', '&', '#', '$', '[', ']', '{', '}', '@', '!', '?'];
78+
var
79+
ALPHA_NUM_SYMBOLS: array [Char] of Boolean;
8080

8181
implementation
8282

@@ -86,6 +86,17 @@ implementation
8686
simba.vartype_pointarray,
8787
simba.fs;
8888

89+
initialization
90+
const
91+
CHARACTERS = ['a'..'z', 'A'..'Z', '0'..'9', '%', '&', '#', '$', '[', ']', '{', '}', '@', '!', '?'];
92+
var
93+
c: Char;
94+
begin
95+
FillChar(AlphaNumSymTable, SizeOf(AlphaNumSymTable), False);
96+
for c in CHARACTERS do
97+
ALPHA_NUM_SYMBOLS[c] := True;
98+
end;
99+
89100
function IsSimilar(const Image: TSimbaImage; const X, Y: Integer; const Color2: TColorBGRA; const Tol: Single): Boolean; inline;
90101
const
91102
MAX_DISTANCE_RGB = Single(441.672955930064); // Sqrt(Sqr(255) + Sqr(255) + Sqr(255))
@@ -112,13 +123,21 @@ function GetGlyph(const Font: PPixelFont; const c: Char): PPixelFontGlyph; inlin
112123
Result := nil;
113124
end;
114125

115-
function ContainsAlphaNumSym(const Text: string): Boolean; inline;
126+
function IsAlphaNumSym(const text: string): Boolean; inline;
116127
var
117-
I: Integer;
128+
i, count, needed: Integer;
118129
begin
119-
for I := 1 to Length(Text) do
120-
if Text[I] in ALPHA_NUM_SYMBOLS then
121-
Exit(True);
130+
//50% of text
131+
needed := (Length(text) * 50 + 99) div 100;
132+
133+
for i := 1 to Length(text) do
134+
if ALPHA_NUM_SYMBOLS[text[i]] then
135+
begin
136+
Inc(count);
137+
//if 50% of the text is ALPHA_NUM_SYMBOLS we are good.
138+
if count >= needed then
139+
Exit(True);
140+
end;
122141

123142
Result := False;
124143
end;
@@ -589,7 +608,7 @@ function TPixelOCR.RecognizeLines(Image: TSimbaImage; constref Font: TPixelFont;
589608
// OCR the row and some extra rows
590609
Match := Temp._RecognizeXY(Image, @Font, X, Y, Font.MaxGlyphHeight div 2, isBinary);
591610
// Ensure that actual Text was extracted, not just a symbol mess of short or small character symbols.
592-
if ContainsAlphaNumSym(Match.Text) then
611+
if IsAlphaNumSym(Match.Text) then
593612
Result := True;
594613
end;
595614
end;

0 commit comments

Comments
 (0)