Skip to content

Commit 17f18ee

Browse files
committed
fix: pixelocr.recognize lines
now instead of checking if just 1 character is alphanum it checks if 50% of the string is instead
1 parent c3abf60 commit 17f18ee

File tree

1 file changed

+37
-20
lines changed

1 file changed

+37
-20
lines changed

Source/simba.pixelocr.pas

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ TPixelOCR = record
7777

7878
const
7979
ALPHA_NUM_SYMBOLS = ['a'..'z', 'A'..'Z', '0'..'9', '%', '&', '#', '$', '[', ']', '{', '}', '@', '!', '?'];
80+
var
81+
AlphaNumSymbolTableInitialized: Boolean = False;
82+
AlphaNumSymbolTable: array [Char] of Boolean;
8083

8184
implementation
8285

@@ -112,13 +115,21 @@ function GetGlyph(const Font: PPixelFont; const c: Char): PPixelFontGlyph; inlin
112115
Result := nil;
113116
end;
114117

115-
function ContainsAlphaNumSym(const Text: string): Boolean; inline;
118+
function IsAlphaNumSym(const text: string): Boolean; inline;
116119
var
117-
I: Integer;
120+
i, count, needed: Integer;
118121
begin
119-
for I := 1 to Length(Text) do
120-
if Text[I] in ALPHA_NUM_SYMBOLS then
121-
Exit(True);
122+
count := 0;
123+
needed := (Length(text) * 50 + 99) div 100;
124+
125+
for i := 1 to Length(text) do
126+
if AlphaNumSymbolTable[text[i]] then
127+
begin
128+
Inc(count);
129+
//if 50% of the text is ALPHA_NUM_SYMBOLS we are good.
130+
if count >= needed then
131+
Exit(True);
132+
end;
122133

123134
Result := False;
124135
end;
@@ -251,6 +262,7 @@ function TPixelOCR._RecognizeX(const Image: TSimbaImage; const Font: PPixelFont;
251262
Result.Bounds.Y1 := $FFFFFF;
252263
Space := 0;
253264

265+
BestGlyph := @Font^.Glyphs[0];
254266
Lo := @Font^.Glyphs[0];
255267
Hi := @Font^.Glyphs[High(Font^.Glyphs)];
256268

@@ -578,42 +590,46 @@ function TPixelOCR.Recognize(Image: TSimbaImage; constref Font: TPixelFont; Boun
578590
end;
579591

580592
function TPixelOCR.RecognizeLines(Image: TSimbaImage; constref Font: TPixelFont; Bounds: TBox): TStringArray;
581-
593+
var Temp: TPixelOCR;
582594
function MaybeRecognize(const X, Y: Integer; const isBinary: Boolean; out Match: TPixelOCRMatch): Boolean;
583-
var
584-
Temp: TPixelOCR;
585595
begin
586596
Result := False;
587-
588-
// use a copy here since we change these properties
589-
Temp := Self;
590-
Temp.Whitelist := ALPHA_NUM_SYMBOLS;
591-
Temp.MaxLen := 1;
592-
Temp.MaxWalk := 0;
593-
594597
// Find something on a row that isn't a small character
595598
Match := Temp._RecognizeX(Image, @Font, X, Y, isBinary);
596599
if (Match.Hits > 0) then
597600
begin
598601
// OCR the row and some extra rows
599-
Temp.Whitelist := Self.Whitelist;
600-
Temp.MaxWalk := 0;
601-
Temp.MaxLen := 0;
602-
603602
Match := Temp._RecognizeXY(Image, @Font, X, Y, Font.MaxGlyphHeight div 2, isBinary);
604603
// Ensure that actual Text was extracted, not just a symbol mess of short or small character symbols.
605-
if ContainsAlphaNumSym(Match.Text) then
604+
if IsAlphaNumSym(Match.Text) then
606605
Result := True;
607606
end;
608607
end;
609608

610609
var
611610
isBinary: Boolean;
612611
Match: TPixelOCRMatch;
612+
c: Char;
613613
begin
614614
if (Length(Font.Glyphs) = 0) then
615615
SimbaException('Font is empty');
616616

617+
if not AlphaNumSymbolTableInitialized then
618+
begin
619+
FillChar(AlphaNumSymbolTable, SizeOf(AlphaNumSymbolTable), False);
620+
for c in ALPHA_NUM_SYMBOLS do
621+
AlphaNumSymbolTable[c] := True;
622+
AlphaNumSymbolTableInitialized := True;
623+
end;
624+
625+
Temp := Self;
626+
Temp.Whitelist := ALPHA_NUM_SYMBOLS;
627+
Temp.MaxLen := 1;
628+
Temp.MaxWalk := 0;
629+
Temp.Whitelist := Self.Whitelist;
630+
Temp.MaxWalk := 0;
631+
Temp.MaxLen := 0;
632+
617633
Result := [];
618634
Matches := [];
619635

@@ -634,6 +650,7 @@ function TPixelOCR.RecognizeLines(Image: TSimbaImage; constref Font: TPixelFont;
634650
// Now we can confidently skip this search line by a jump, but we dont skip fully in case of close/overlapping Text
635651
// So we divide the texts max glyph Height by 2 and subtract that from the lower end of the found bounds.
636652
Bounds.Y1 := Max(Bounds.Y1, Match.Bounds.Y2 - (Font.MaxGlyphHeight div 2));
653+
Continue;
637654
end;
638655

639656
Bounds.Y1 += 1;

0 commit comments

Comments
 (0)