Skip to content

Commit 86c1cf3

Browse files
author
Kirill Makankov
committed
1. recognitionConfis.txt has been renamed to recognitionConfigs.txt
2. tessdata-rus has been added as a recource. it's used to check the #53 scenario. 3. some recognition tests have been added after tes initialization, which proofs correct initialization from Cashes folder. 4. tessedit_char_whitelist 0123456789 added to the config file for the recognition tests.
1 parent f187787 commit 86c1cf3

File tree

12 files changed

+507726
-23
lines changed

12 files changed

+507726
-23
lines changed

Template Framework Project/Template Framework Project.xcodeproj/project.pbxproj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
64DDA74E188FD7D10025590D /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6400DDEA180C5DE000443362 /* Main.storyboard */; };
2222
64DDA75B188FD9140025590D /* tessdata in Resources */ = {isa = PBXBuildFile; fileRef = 64DDA75A188FD9140025590D /* tessdata */; };
2323
64E40AB1180C6D4D00C36DDE /* libstdc++.6.0.9.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 64E40AB0180C6D4D00C36DDE /* libstdc++.6.0.9.dylib */; };
24+
736EFF261A5877200031B432 /* tessdata-rus in Resources */ = {isa = PBXBuildFile; fileRef = 736EFF251A5877200031B432 /* tessdata-rus */; };
2425
/* End PBXBuildFile section */
2526

2627
/* Begin PBXFileReference section */
@@ -43,6 +44,7 @@
4344
6490748F198A5CD500D728CC /* CoreImage.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreImage.framework; path = System/Library/Frameworks/CoreImage.framework; sourceTree = SDKROOT; };
4445
64DDA75A188FD9140025590D /* tessdata */ = {isa = PBXFileReference; lastKnownFileType = folder; path = tessdata; sourceTree = "<group>"; };
4546
64E40AB0180C6D4D00C36DDE /* libstdc++.6.0.9.dylib */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = "libstdc++.6.0.9.dylib"; path = "usr/lib/libstdc++.6.0.9.dylib"; sourceTree = SDKROOT; };
47+
736EFF251A5877200031B432 /* tessdata-rus */ = {isa = PBXFileReference; lastKnownFileType = folder; name = "tessdata-rus"; path = "../TestsProject/TestsProjectTests/tessdata-rus"; sourceTree = "<group>"; };
4648
/* End PBXFileReference section */
4749

4850
/* Begin PBXFrameworksBuildPhase section */
@@ -65,6 +67,7 @@
6567
6400DDCC180C5DE000443362 = {
6668
isa = PBXGroup;
6769
children = (
70+
736EFF251A5877200031B432 /* tessdata-rus */,
6871
6400DDDE180C5DE000443362 /* Template Framework Project */,
6972
6400DDD7180C5DE000443362 /* Frameworks */,
7073
6400DDD6180C5DE000443362 /* Products */,
@@ -175,6 +178,7 @@
175178
6400DDF1180C5DE000443362 /* Images.xcassets in Resources */,
176179
428615B71845F340005D5A2E /* image_sample.jpg in Resources */,
177180
64DDA75B188FD9140025590D /* tessdata in Resources */,
181+
736EFF261A5877200031B432 /* tessdata-rus in Resources */,
178182
6400DDE3180C5DE000443362 /* InfoPlist.strings in Resources */,
179183
64DDA74E188FD7D10025590D /* Main.storyboard in Resources */,
180184
);

TestsProject/TestsProject.xcodeproj/project.pbxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
732C54751A514DA5000322DA /* InitializationTests.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = InitializationTests.m; sourceTree = "<group>"; };
7676
732C54771A5288CC000322DA /* Defaults.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Defaults.h; sourceTree = "<group>"; };
7777
732C54781A5288CC000322DA /* Defaults.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = Defaults.m; sourceTree = "<group>"; };
78+
736EFF231A5872CA0031B432 /* tessdata-rus */ = {isa = PBXFileReference; lastKnownFileType = folder; name = "tessdata-rus"; path = "TestsProjectTests/tessdata-rus"; sourceTree = SOURCE_ROOT; };
7879
/* End PBXFileReference section */
7980

8081
/* Begin PBXFrameworksBuildPhase section */
@@ -124,6 +125,7 @@
124125
4115B95D1A3EF8E90004EC0A /* TestsProject */ = {
125126
isa = PBXGroup;
126127
children = (
128+
736EFF231A5872CA0031B432 /* tessdata-rus */,
127129
41184B551A3EFCAD007F5923 /* tessdata */,
128130
4115B9621A3EF8E90004EC0A /* AppDelegate.h */,
129131
4115B9631A3EF8E90004EC0A /* AppDelegate.m */,

TestsProject/TestsProjectTests/InitializationTests.m

Lines changed: 65 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,28 @@
2121
NSString *tessdataFolderName = @"tessdata";
2222
NSString *tessdataFolderPathFromTheBundle = [[resourcePath stringByAppendingPathComponent:tessdataFolderName] stringByAppendingString:@"/"];
2323
NSString *debugConfigsFileName = @"debugConfigs.txt";
24-
NSString *recognitionConfigsFileName = @"recognitionConfis.txt";
24+
NSString *recognitionConfigsFileName = @"recognitionConfigs.txt";
2525
NSString *tessConfigsFolderName = @"tessconfigs";
2626

2727
void (^checkVariablesAreSetForTesseract)(G8Tesseract *tesseract) = ^(G8Tesseract *tesseract){
28+
// these variable could be set up during the initialization
2829
[[[tesseract variableValueForKey:kG8ParamTessdataManagerDebugLevel] should] equal:@"1"];
2930
[[[tesseract variableValueForKey:kG8ParamLoadSystemDawg] should] equal:@"0"];
3031
[[[tesseract variableValueForKey:kG8ParamLoadFreqDawg] should] equal:@"0"];
3132
[[[tesseract variableValueForKey:kG8ParamUserWordsSuffix] should] equal:@"user-words"];
3233
[[[tesseract variableValueForKey:kG8ParamUserPatternsSuffix] should] equal:@"user-patterns"];
3334
};
3435

36+
void (^recognizeSimpleImageWithTesseract)(G8Tesseract *tesseract) = ^(G8Tesseract *tesseract) {
37+
tesseract.image = [UIImage imageNamed:@"image_sample.jpg"];
38+
[tesseract recognize];
39+
40+
[[theValue(tesseract.progress) should] equal:theValue(100)];
41+
42+
NSString *recognizedText = tesseract.recognizedText;
43+
[[recognizedText should] equal:@"1234567890\n\n"];
44+
};
45+
3546
context(@"nil cachesRelatedDataPath", ^{
3647

3748
it(@"Should initialize simple", ^{
@@ -134,7 +145,7 @@
134145

135146
context(@"no tessdata folder in the Caches yet", ^{
136147

137-
it(@"Should simple init", ^{
148+
it(@"Should simple init, download rus language files and reinitialize tess with them", ^{
138149
// proof Caches folder is empty
139150
BOOL folderExists = [fileManager fileExistsAtPath:cachesTessDataPath];
140151
[[theValue(folderExists) should] beNo];
@@ -149,6 +160,39 @@
149160
[[tesseract.absoluteDataPath should] equal:cachesTessDataPath];
150161

151162
[[theValue(doFoldersContainTheSameElements()) should] beYes];
163+
164+
recognizeSimpleImageWithTesseract(tesseract);
165+
166+
// move rus langiage files to the folder created on previous steps
167+
NSString *rusTessdataSourcePath = [[resourcePath stringByAppendingPathComponent:tessdataFolderName] stringByAppendingString:@"-rus"];
168+
NSString *destinationPath = [[cachesPath stringByAppendingPathComponent:tessdataPath] stringByAppendingPathComponent:tessdataFolderName];
169+
170+
NSFileManager *fileManager = [NSFileManager defaultManager];
171+
NSError *error = nil;
172+
NSArray *contents = [fileManager contentsOfDirectoryAtPath:rusTessdataSourcePath error:&error];
173+
for (NSString *filaName in contents) {
174+
NSString *rusFileName = [rusTessdataSourcePath stringByAppendingPathComponent:filaName];
175+
NSString *destinationFileName = [destinationPath stringByAppendingPathComponent:filaName];
176+
if ([fileManager fileExistsAtPath:destinationFileName isDirectory:nil]) {
177+
[fileManager removeItemAtPath:destinationFileName error:nil];
178+
}
179+
180+
BOOL res = [fileManager copyItemAtPath:rusFileName toPath:destinationFileName error:&error];
181+
if (error) {
182+
NSLog(@"Error copying rus traineddata: %@", error);
183+
}
184+
assert(res == YES);
185+
}
186+
187+
// initialize with rus now
188+
G8Tesseract *rusResseract = [[G8Tesseract alloc] initWithLanguage:@"rus"
189+
configDictionary:nil
190+
configFileNames:nil
191+
cachesRelatedDataPath:tessdataPath
192+
engineMode:G8OCREngineModeTesseractOnly];
193+
[[rusResseract shouldNot] beNil];
194+
195+
[[rusResseract.absoluteDataPath should] equal:cachesTessDataPath];
152196
});
153197

154198
it(@"Should initialize with config dictionary", ^{
@@ -168,6 +212,8 @@
168212
[[tesseract.absoluteDataPath should] equal:cachesTessDataPath];
169213

170214
checkVariablesAreSetForTesseract(tesseract);
215+
216+
recognizeSimpleImageWithTesseract(tesseract);
171217
});
172218

173219
it(@"Should initialize config dictionary and a file", ^{
@@ -183,25 +229,25 @@
183229
[[tesseract.absoluteDataPath should] equal:cachesTessDataPath];
184230

185231
checkVariablesAreSetForTesseract(tesseract);
232+
233+
recognizeSimpleImageWithTesseract(tesseract);
186234
});
187-
});
188-
189-
it(@"Should initialize with config file path, while there is no tessdata folder yet", ^{
190235

191-
NSLog(@"debugConfigsFilePathFromTheCaches: %@", debugConfigsFilePathFromTheCaches);
192-
NSLog(@"recognitionConfigsFilePathFromTheCaches: %@", recognitionConfigsFilePathFromTheCaches);
193-
[[[fileManager attributesOfItemAtPath:debugConfigsFilePathFromTheCaches error:nil] should] beNil];
194-
[[[fileManager attributesOfItemAtPath:recognitionConfigsFilePathFromTheCaches error:nil] should] beNil];
195-
196-
G8Tesseract *tesseract = [[G8Tesseract alloc] initWithLanguage:kG8Languages
197-
configDictionary:nil
198-
configFileNames:@[debugConfigsFilePathFromTheCaches, recognitionConfigsFilePathFromTheCaches]
199-
cachesRelatedDataPath:tessdataPath
200-
engineMode:G8OCREngineModeTesseractOnly];
201-
[[tesseract shouldNot] beNil];
202-
[[tesseract.absoluteDataPath should] equal:cachesTessDataPath];
203-
204-
checkVariablesAreSetForTesseract(tesseract);
236+
it(@"Should initialize with 2 config files", ^{
237+
238+
[[[fileManager attributesOfItemAtPath:debugConfigsFilePathFromTheCaches error:nil] should] beNil];
239+
[[[fileManager attributesOfItemAtPath:recognitionConfigsFilePathFromTheCaches error:nil] should] beNil];
240+
241+
G8Tesseract *tesseract = [[G8Tesseract alloc] initWithLanguage:kG8Languages
242+
configDictionary:nil
243+
configFileNames:@[debugConfigsFilePathFromTheCaches, recognitionConfigsFilePathFromTheCaches]
244+
cachesRelatedDataPath:tessdataPath
245+
engineMode:G8OCREngineModeTesseractOnly];
246+
[[tesseract shouldNot] beNil];
247+
[[tesseract.absoluteDataPath should] equal:cachesTessDataPath];
248+
249+
checkVariablesAreSetForTesseract(tesseract);
250+
});
205251
});
206252

207253
context(@"tessdata are already in the Caches", ^{
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
1|І
3+
×Хх
4+
Дд
5+
Ии
6+
Йй
7+
Лл
8+
Пп
9+
Мм
10+
Тт
11+
Цц
12+
Чч
13+
Шш
14+
Щщ
15+
вВ
16+
сС
17+
нН
18+
ыЫ
19+
юЮ
20+
"“”
21+
ЪъЬь
22+
3ЭэЗз
23+
ҐГ
24+
6бБ
25+
Кк
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
LeadPunc="({[`'«‹
2+
TrailPunc=}:;-]!?`,.)"'»›
3+
NumLeadPunc=#({[@$€
4+
NumTrailPunc=}):;].,%°
5+
Operators=*+-/.:,()[]±
6+
Digits=0123456789
7+
Alphas=ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё
891 KB
Binary file not shown.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
RecoWgt=0.7255
2+
SizeWgt=0.2327
3+
CharBigramsWgt=0.0000
4+
WordUnigramsWgt=0.0035
5+
MaxSegPerChar=8
6+
BeamWidth=10
7+
ConvGridSize=48
8+
HistWindWid=2
9+
MinConCompSize=0
10+
MaxWordAspectRatio=20.0000
11+
MinSpaceHeightRatio=0.5000
12+
MaxSpaceHeightRatio=0.6000
13+
CombinerRunThresh=1.0000
14+
CombinerClassifierThresh=0.5000
15+
OODWgt=0.0354
16+
NumWgt=0.0028

0 commit comments

Comments
 (0)