@@ -77,6 +77,9 @@ TPixelOCR = record
7777
7878const
7979 ALPHA_NUM_SYMBOLS = [' a' ..' z' , ' A' ..' Z' , ' 0' ..' 9' , ' %' , ' &' , ' #' , ' $' , ' [' , ' ]' , ' {' , ' }' , ' @' , ' !' , ' ?' ];
80+ var
81+ AlphaNumSymbolTableInitialized: Boolean = False;
82+ AlphaNumSymbolTable: array [Char] of Boolean;
8083
8184implementation
8285
@@ -112,13 +115,21 @@ function GetGlyph(const Font: PPixelFont; const c: Char): PPixelFontGlyph; inlin
112115 Result := nil ;
113116end ;
114117
115- function ContainsAlphaNumSym (const Text : string): Boolean; inline;
118+ function IsAlphaNumSym (const text : string): Boolean; inline;
116119var
117- I : Integer;
120+ i, count, needed : Integer;
118121begin
119- for I := 1 to Length(Text) do
120- if Text[I] in ALPHA_NUM_SYMBOLS then
121- Exit(True);
122+ count := 0 ;
123+ needed := (Length(text) * 50 + 99 ) div 100 ;
124+
125+ for i := 1 to Length(text) do
126+ if AlphaNumSymbolTable[text[i]] then
127+ begin
128+ Inc(count);
129+ // if 50% of the text is ALPHA_NUM_SYMBOLS we are good.
130+ if count >= needed then
131+ Exit(True);
132+ end ;
122133
123134 Result := False;
124135end ;
@@ -251,6 +262,7 @@ function TPixelOCR._RecognizeX(const Image: TSimbaImage; const Font: PPixelFont;
251262 Result.Bounds.Y1 := $FFFFFF;
252263 Space := 0 ;
253264
265+ BestGlyph := @Font^.Glyphs[0 ];
254266 Lo := @Font^.Glyphs[0 ];
255267 Hi := @Font^.Glyphs[High(Font^.Glyphs)];
256268
@@ -578,42 +590,46 @@ function TPixelOCR.Recognize(Image: TSimbaImage; constref Font: TPixelFont; Boun
578590end ;
579591
580592function TPixelOCR.RecognizeLines (Image: TSimbaImage; constref Font: TPixelFont; Bounds: TBox): TStringArray;
581-
593+ var Temp: TPixelOCR;
582594 function MaybeRecognize (const X, Y: Integer; const isBinary: Boolean; out Match: TPixelOCRMatch): Boolean;
583- var
584- Temp: TPixelOCR;
585595 begin
586596 Result := False;
587-
588- // use a copy here since we change these properties
589- Temp := Self;
590- Temp.Whitelist := ALPHA_NUM_SYMBOLS;
591- Temp.MaxLen := 1 ;
592- Temp.MaxWalk := 0 ;
593-
594597 // Find something on a row that isn't a small character
595598 Match := Temp._RecognizeX(Image, @Font, X, Y, isBinary);
596599 if (Match.Hits > 0 ) then
597600 begin
598601 // OCR the row and some extra rows
599- Temp.Whitelist := Self.Whitelist;
600- Temp.MaxWalk := 0 ;
601- Temp.MaxLen := 0 ;
602-
603602 Match := Temp._RecognizeXY(Image, @Font, X, Y, Font.MaxGlyphHeight div 2 , isBinary);
604603 // Ensure that actual Text was extracted, not just a symbol mess of short or small character symbols.
605- if ContainsAlphaNumSym (Match.Text) then
604+ if IsAlphaNumSym (Match.Text) then
606605 Result := True;
607606 end ;
608607 end ;
609608
610609var
611610 isBinary: Boolean;
612611 Match: TPixelOCRMatch;
612+ c: Char;
613613begin
614614 if (Length(Font.Glyphs) = 0 ) then
615615 SimbaException(' Font is empty' );
616616
617+ if not AlphaNumSymbolTableInitialized then
618+ begin
619+ FillChar(AlphaNumSymbolTable, SizeOf(AlphaNumSymbolTable), False);
620+ for c in ALPHA_NUM_SYMBOLS do
621+ AlphaNumSymbolTable[c] := True;
622+ AlphaNumSymbolTableInitialized := True;
623+ end ;
624+
625+ Temp := Self;
626+ Temp.Whitelist := ALPHA_NUM_SYMBOLS;
627+ Temp.MaxLen := 1 ;
628+ Temp.MaxWalk := 0 ;
629+ Temp.Whitelist := Self.Whitelist;
630+ Temp.MaxWalk := 0 ;
631+ Temp.MaxLen := 0 ;
632+
617633 Result := [];
618634 Matches := [];
619635
@@ -634,6 +650,7 @@ function TPixelOCR.RecognizeLines(Image: TSimbaImage; constref Font: TPixelFont;
634650 // Now we can confidently skip this search line by a jump, but we dont skip fully in case of close/overlapping Text
635651 // So we divide the texts max glyph Height by 2 and subtract that from the lower end of the found bounds.
636652 Bounds.Y1 := Max(Bounds.Y1, Match.Bounds.Y2 - (Font.MaxGlyphHeight div 2 ));
653+ Continue;
637654 end ;
638655
639656 Bounds.Y1 += 1 ;
0 commit comments