Skip to content

Commit eaf49dd

Browse files
committed
Merge pull request #124 from ws233/master
Renamed `confidencesByIteratorLevel` to `recognizedBlocksByIteratorLevel` and removed `characterBoxes`. Fixes #109.
2 parents 1977e55 + 454da5c commit eaf49dd

File tree

3 files changed

+14
-57
lines changed

3 files changed

+14
-57
lines changed

TesseractOCR/G8Tesseract.h

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -155,14 +155,6 @@
155155
*/
156156
@property (nonatomic, readonly) CGFloat deskewAngle;
157157

158-
/**
159-
* An array of `G8RecognizedBlock`'s representing the characters recognized
160-
* in the target image, including the bounding boxes for each character. See
161-
* G8RecognizedBlock.h for more information about the available fields for
162-
* this data structure.
163-
*/
164-
@property (nonatomic, readonly) NSArray *characterBoxes;
165-
166158
/**
167159
* An array of arrays, where each subarray contains `G8RecognizedBlock`'s
168160
* representing the choices Tesseract considered for each symbol in the target
@@ -175,18 +167,20 @@
175167
@property (nonatomic, readonly) NSArray *characterChoices;
176168

177169
/**
178-
* Retrieve Tesseract's confidences for its recognition result based on a
179-
* provided resolution.
170+
* Retrieve Tesseract's recognition result based on a provided resolution.
171+
* For, example for the pageIteratorLevel == G8PageIteratorLevelSymbol it returns
172+
* an array of `G8RecognizedBlock`'s representing the characters recognized
173+
* in the target image, including the bounding boxes for each character.
180174
*
181175
* @param pageIteratorLevel A `G8PageIteratorLevel` representing the resolution
182176
* of interest. See G8Constants.h for the available
183177
* resolution options.
184178
*
185-
* @return An array of `G8RecognizedBlock`'s, each containing a confidence
186-
* value for the text it represents. See G8RecognizedBlock.h for more
179+
* @return An array of `G8RecognizedBlock`'s, each containing a confidence
180+
* value and a bounding box for the text it represents. See G8RecognizedBlock.h for more
187181
* information about the available fields for this data structure.
188182
*/
189-
- (NSArray *)confidencesByIteratorLevel:(G8PageIteratorLevel)pageIteratorLevel;
183+
- (NSArray *)recognizedBlocksByIteratorLevel:(G8PageIteratorLevel)pageIteratorLevel;
190184

191185
#pragma mark - Debug methods
192186

TesseractOCR/G8Tesseract.mm

Lines changed: 4 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -500,44 +500,6 @@ - (void)analyzeLayout
500500
self.layoutAnalysed = YES;
501501
}
502502

503-
- (NSArray *)characterBoxes
504-
{
505-
NSMutableArray *recognizedTextBoxes = [[NSMutableArray alloc] init];
506-
507-
// Get box info
508-
char *boxText = _tesseract->GetBoxText(0);
509-
if (boxText == NULL) {
510-
NSLog(@"No boxes recognized. Check that -[Tesseract setImage:] is passed an image bigger than 0x0.");
511-
return nil;
512-
}
513-
514-
NSString *stringBoxes = [NSString stringWithUTF8String:boxText];
515-
delete[] boxText;
516-
517-
NSArray *arrayOfStringBoxes = [stringBoxes componentsSeparatedByString:@"\n"];
518-
for (NSString *stringBox in arrayOfStringBoxes) {
519-
// A stringBox is of the format "c L B R T p"
520-
// (L, T) is the top left corner of the box, and (R, B) is the bottom right corner
521-
// Tesseract has (0, 0) in the bottom left corner and UIKit has (0, 0) in the top left corner
522-
// Need to flip to work with UIKit
523-
// c is the recognized character and p is the page it is recognized on
524-
NSArray *boxComponents = [stringBox componentsSeparatedByString:@" "];
525-
if (boxComponents.count >= 6) {
526-
CGFloat x = [boxComponents[1] floatValue];
527-
CGFloat y = self.imageSize.height - [boxComponents[4] floatValue];
528-
CGFloat width = [boxComponents[3] floatValue] - [boxComponents[1] floatValue];
529-
CGFloat height = [boxComponents[4] floatValue] - [boxComponents[2] floatValue];
530-
CGRect box = [self normalizedRectForX:x y:y width:width height:height];
531-
532-
G8RecognizedBlock *block = [[G8RecognizedBlock alloc] initWithText:boxComponents[0]
533-
boundingBox:box
534-
confidence:0.0f
535-
level:G8PageIteratorLevelBlock];
536-
[recognizedTextBoxes addObject:block];
537-
}
538-
}
539-
return [recognizedTextBoxes copy];
540-
}
541503

542504
- (CGRect)normalizedRectForX:(CGFloat)x y:(CGFloat)y width:(CGFloat)width height:(CGFloat)height
543505
{
@@ -557,8 +519,9 @@ - (G8RecognizedBlock *)blockFromIterator:(tesseract::ResultIterator *)iterator
557519
const char *word = iterator->GetUTF8Text(level);
558520
if (word != NULL) {
559521
// BoundingBox parameters are (Left Top Right Bottom).
560-
// See comment in characterBoxes() for information on the coordinate
561-
// system, and changes being made.
522+
// (L, T) is the top left corner of the box, and (R, B) is the bottom right corner
523+
// Tesseract has (0, 0) in the bottom left corner and UIKit has (0, 0) in the top left corner
524+
// Need to flip to work with UIKit
562525
int x1, y1, x2, y2;
563526
iterator->BoundingBox(level, &x1, &y1, &x2, &y2);
564527

@@ -614,7 +577,7 @@ - (NSArray *)characterChoices
614577
return [array copy];
615578
}
616579

617-
- (NSArray *)confidencesByIteratorLevel:(G8PageIteratorLevel)pageIteratorLevel
580+
- (NSArray *)recognizedBlocksByIteratorLevel:(G8PageIteratorLevel)pageIteratorLevel
618581
{
619582
tesseract::PageIteratorLevel level = (tesseract::PageIteratorLevel)pageIteratorLevel;
620583

TestsProject/TestsProjectTests/RecognitionTests.m

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@
111111
it(@"Should provide confidences", ^{
112112
[helper recognizeImage];
113113

114-
NSArray *confidences = [helper.tesseract confidencesByIteratorLevel:G8PageIteratorLevelWord];
114+
NSArray *confidences = [helper.tesseract recognizedBlocksByIteratorLevel:G8PageIteratorLevelWord];
115115
[[[confidences should] have:1] object];
116116

117117
id blockObj = confidences.firstObject;
@@ -128,7 +128,7 @@
128128
it(@"Should draw blocks on image", ^{
129129
[helper recognizeImage];
130130

131-
NSArray *blocks = [helper.tesseract confidencesByIteratorLevel:G8PageIteratorLevelSymbol];
131+
NSArray *blocks = [helper.tesseract recognizedBlocksByIteratorLevel:G8PageIteratorLevelSymbol];
132132
UIImage *blocksImage = [helper.tesseract imageWithBlocks:blocks drawText:YES thresholded:NO];
133133
UIImage *expectedBlocksImage = [UIImage imageNamed:@"image_sample_bl"];
134134

@@ -236,7 +236,7 @@
236236
[[recognizedText should] containString:kG8WellScanedFirstTitle];
237237
[[recognizedText shouldNot] containString:kG8WellScanedFinalLongString];
238238

239-
[[[[helper.tesseract confidencesByIteratorLevel:G8PageIteratorLevelWord] should] haveAtLeast:10] items];
239+
[[[[helper.tesseract recognizedBlocksByIteratorLevel:G8PageIteratorLevelWord] should] haveAtLeast:10] items];
240240
});
241241

242242
});

0 commit comments

Comments
 (0)