Skip to content

Commit ace7dfb

Browse files
committed
Merge pull request #139 from ws233/master
Added unit tests for setEngineMode and loadVariables, improved others.
2 parents 92ef3d5 + a2dfd11 commit ace7dfb

File tree

3 files changed

+89
-48
lines changed

3 files changed

+89
-48
lines changed

TesseractOCR/G8Tesseract.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,9 @@
299299

300300
/**
301301
* Set Tesseract variables using a dictionary. See G8TesseractParameters.h for
302-
* the available options.
302+
* the available options. Only runtime variables could be set. To set up
303+
* initial time variables use
304+
* initWithLanguage:configDictionary:configFileNames:cachesRelatedDataPath:engineMode:
303305
*
304306
* @param dictionary The dictionary of key/value pairs to set for Tesseract.
305307
*/

TesseractOCR/G8Tesseract.mm

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,7 @@ + (void)didReceiveMemoryWarningNotification:(NSNotification*)notification {
6868
+ (NSString *)version
6969
{
7070
const char *version = tesseract::TessBaseAPI::Version();
71-
if (version != NULL) {
72-
return [NSString stringWithUTF8String:version];
73-
}
74-
return @"n/a";
71+
return [NSString stringWithUTF8String:version];
7572
}
7673

7774
+ (void)clearCache
@@ -177,7 +174,7 @@ - (BOOL)configEngine
177174
}
178175

179176
int count = (int)self.configFileNames.count;
180-
const char **configs = (const char **)malloc(sizeof(const char *) * count);
177+
const char **configs = count ? (const char **)malloc(sizeof(const char *) * count) : NULL;
181178
for (int i = 0; i < count; i++) {
182179
configs[i] = ((NSString*)self.configFileNames[i]).UTF8String;
183180
}

TestsProject/TestsProjectTests/InitializationTests.m

Lines changed: 84 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ - (BOOL)resetEngine;
3030
NSString *recognitionConfigsFileName = @"recognitionConfigs.txt";
3131
NSString *tessConfigsFolderName = @"tessconfigs";
3232

33+
// config dictionary and its proving block
34+
NSDictionary *initOnlyConfigDictionary = @{
35+
kG8ParamTessdataManagerDebugLevel : @"1",
36+
kG8ParamLoadSystemDawg : @"F",
37+
kG8ParamLoadFreqDawg : @"F",
38+
kG8ParamUserWordsSuffix : @"user-words",
39+
kG8ParamUserPatternsSuffix : @"user-patterns",
40+
};
3341
void (^checkVariablesAreSetForTesseract)(G8Tesseract *tesseract) = ^(G8Tesseract *tesseract){
3442
// these variable could be set up during the initialization
3543
[[[tesseract variableValueForKey:kG8ParamTessdataManagerDebugLevel] should] equal:@"1"];
@@ -49,9 +57,9 @@ - (BOOL)resetEngine;
4957
[[recognizedText should] equal:@"1234567890\n\n"];
5058
};
5159

52-
context(@"Should check common function", ^{
60+
context(@"Should check common functions", ^{
5361

54-
it(@"Should check version", ^{
62+
it(@"Should print version", ^{
5563
[[[G8Tesseract version] should] equal:@"3.03"];
5664
});
5765

@@ -139,13 +147,7 @@ - (BOOL)resetEngine;
139147
it(@"Should initialize with config dictionary", ^{
140148

141149
G8Tesseract *tesseract = [[G8Tesseract alloc] initWithLanguage:kG8Languages
142-
configDictionary:@{
143-
kG8ParamTessdataManagerDebugLevel : @"1",
144-
kG8ParamLoadSystemDawg : @"F",
145-
kG8ParamLoadFreqDawg : @"F",
146-
kG8ParamUserWordsSuffix : @"user-words",
147-
kG8ParamUserPatternsSuffix : @"user-patterns",
148-
}
150+
configDictionary:initOnlyConfigDictionary
149151
configFileNames:nil
150152
cachesRelatedDataPath:nil
151153
engineMode:G8OCREngineModeTesseractOnly];
@@ -245,25 +247,8 @@ - (BOOL)resetEngine;
245247

246248
context(@"no tessdata folder in the Caches yet", ^{
247249

248-
it(@"Should simple init, download rus language files and reinitialize tess with them", ^{
249-
// proof Caches folder is empty
250-
BOOL folderExists = [fileManager fileExistsAtPath:cachesTessDataPath];
251-
NSAssert(folderExists == NO, @"Error! Tessdata folder is already here: %@", cachesTessDataPath);
252-
253-
G8Tesseract *tesseract = [[G8Tesseract alloc] initWithLanguage:kG8Languages
254-
configDictionary:nil
255-
configFileNames:nil
256-
cachesRelatedDataPath:tessdataPath
257-
engineMode:G8OCREngineModeTesseractOnly];
258-
[[tesseract shouldNot] beNil];
259-
260-
[[tesseract.absoluteDataPath should] equal:cachesTessDataPath];
261-
262-
[[theValue(doFoldersContainTheSameElements()) should] beYes];
263-
264-
recognizeSimpleImageWithTesseract(tesseract);
265-
266-
// move rus langiage files to the folder created on previous steps
250+
void (^moveRusLanguageFilesToTheCachesFolder)() = ^{
251+
// move rus language files to the folder created on previous steps
267252
NSString *rusTessdataSourcePath = [[resourcePath stringByAppendingPathComponent:tessdataFolderName] stringByAppendingString:@"-rus"];
268253
NSString *destinationPath = [[cachesPath stringByAppendingPathComponent:tessdataPath] stringByAppendingPathComponent:tessdataFolderName];
269254

@@ -283,6 +268,33 @@ - (BOOL)resetEngine;
283268
}
284269
assert(res == YES);
285270
}
271+
};
272+
273+
G8Tesseract* (^tesseractInitializedWithTessData)() = ^{
274+
// prove Caches folder is empty
275+
BOOL folderExists = [fileManager fileExistsAtPath:cachesTessDataPath];
276+
NSAssert(folderExists == NO, @"Error! Tessdata folder is already here: %@", cachesTessDataPath);
277+
278+
G8Tesseract *tesseract = [[G8Tesseract alloc] initWithLanguage:kG8Languages
279+
configDictionary:nil
280+
configFileNames:nil
281+
cachesRelatedDataPath:tessdataPath
282+
engineMode:G8OCREngineModeTesseractOnly];
283+
[[tesseract shouldNot] beNil];
284+
285+
[[tesseract.absoluteDataPath should] equal:cachesTessDataPath];
286+
287+
NSAssert(doFoldersContainTheSameElements() == YES, @"Error! The tessdata folder in the caches folder contains different elements!");
288+
289+
return tesseract;
290+
};
291+
292+
it(@"Should simple init, download rus language files and reinitialize tess with them", ^{
293+
G8Tesseract *tesseract = tesseractInitializedWithTessData();
294+
295+
recognizeSimpleImageWithTesseract(tesseract);
296+
297+
moveRusLanguageFilesToTheCachesFolder();
286298

287299
// initialize with rus now
288300
G8Tesseract *rusTesseract = [[G8Tesseract alloc] initWithLanguage:@"rus"
@@ -293,18 +305,54 @@ - (BOOL)resetEngine;
293305
[[rusTesseract shouldNot] beNil];
294306

295307
[[rusTesseract.absoluteDataPath should] equal:cachesTessDataPath];
308+
309+
recognizeSimpleImageWithTesseract(tesseract);
310+
});
311+
312+
it(@"Should set variables from dictionary and reinit correctly", ^{
313+
314+
G8Tesseract *tesseract = tesseractInitializedWithTessData();
315+
316+
NSDictionary *dictionaryForRuntime = @{
317+
kG8ParamTessdataManagerDebugLevel : @"1",
318+
kG8ParamUserWordsSuffix : @"user-words",
319+
};
320+
NSString *whitelistString = @"1234567890";
321+
NSString *blacklistString = @"aAbBcC";
322+
void (^checkVariablesSetOnRuntime)(void) = ^{
323+
[[[tesseract variableValueForKey:kG8ParamTessdataManagerDebugLevel] should] equal:@"1"];
324+
[[[tesseract variableValueForKey:kG8ParamUserWordsSuffix] shouldNot] equal:@"user-words"]; // initial only, should not be set
325+
[[[tesseract variableValueForKey:kG8ParamTesseditCharWhitelist] should] equal:whitelistString];
326+
[[[tesseract variableValueForKey:kG8ParamTesseditCharBlacklist] should] equal:blacklistString];
327+
328+
[[tesseract.charWhitelist should] equal:whitelistString];
329+
[[tesseract.charBlacklist should] equal:blacklistString];
330+
};
331+
332+
tesseract.charWhitelist = whitelistString;
333+
tesseract.charBlacklist = blacklistString;
334+
[tesseract setVariablesFromDictionary:dictionaryForRuntime];
335+
checkVariablesSetOnRuntime();
336+
337+
moveRusLanguageFilesToTheCachesFolder();
338+
// reinit tesseract with different language to check that all the variables are reset after reinitialization
339+
tesseract.language = @"rus";
340+
341+
checkVariablesSetOnRuntime();
342+
343+
recognizeSimpleImageWithTesseract(tesseract);
344+
345+
tesseract.engineMode = G8OCREngineModeCubeOnly;
346+
checkVariablesSetOnRuntime();
347+
348+
// uncomment this to see the error in cube mode with rus locale
349+
//recognizeSimpleImageWithTesseract(tesseract);
296350
});
297351

298352
it(@"Should initialize with config dictionary", ^{
299353

300354
G8Tesseract *tesseract = [[G8Tesseract alloc] initWithLanguage:kG8Languages
301-
configDictionary:@{
302-
kG8ParamTessdataManagerDebugLevel : @"1",
303-
kG8ParamLoadSystemDawg : @"F",
304-
kG8ParamLoadFreqDawg : @"F",
305-
kG8ParamUserWordsSuffix : @"user-words",
306-
kG8ParamUserPatternsSuffix : @"user-patterns",
307-
}
355+
configDictionary:initOnlyConfigDictionary
308356
configFileNames:nil
309357
cachesRelatedDataPath:tessdataPath
310358
engineMode:G8OCREngineModeTesseractOnly];
@@ -447,13 +495,7 @@ - (BOOL)resetEngine;
447495
it(@"Should initialize with config dictionary", ^{
448496

449497
G8Tesseract *tesseract = [[G8Tesseract alloc] initWithLanguage:kG8Languages
450-
configDictionary:@{
451-
kG8ParamTessdataManagerDebugLevel : @"1",
452-
kG8ParamLoadSystemDawg : @"F",
453-
kG8ParamLoadFreqDawg : @"F",
454-
kG8ParamUserWordsSuffix : @"user-words",
455-
kG8ParamUserPatternsSuffix : @"user-patterns",
456-
}
498+
configDictionary:initOnlyConfigDictionary
457499
configFileNames:nil
458500
cachesRelatedDataPath:tessdataPath
459501
engineMode:G8OCREngineModeTesseractOnly];

0 commit comments

Comments
 (0)