Skip to content

Commit 87614be

Browse files
author
Kevin Conley
committed
Merge pull request #158 from PatrickNLT/master
Added the ability to load tessdata language files from any directory.
2 parents 133c8cb + aa89bc4 commit 87614be

File tree

4 files changed

+192
-20
lines changed

4 files changed

+192
-20
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
language: objective-c
22
xcode_workspace: Tesseract-OCR-iOS.xcworkspace
33
xcode_sdk:
4-
- iphonesimulator7.0
4+
- iphonesimulator7.1
55
- iphonesimulator8.1
66
xcode_scheme:
77
- Template Framework Project

TesseractOCR/G8Tesseract.h

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,49 @@ extern NSInteger const kG8MaxCredibleResolution;
297297
configDictionary:(NSDictionary *)configDictionary
298298
configFileNames:(NSArray *)configFileNames
299299
cachesRelatedDataPath:(NSString *)cachesRelatedDataPath
300-
engineMode:(G8OCREngineMode)engineMode NS_DESIGNATED_INITIALIZER;
300+
engineMode:(G8OCREngineMode)engineMode;
301+
302+
/**
303+
* Initialize Tesseract with the provided language and engine mode.
304+
*
305+
* @param language The language to use in recognition. See
306+
* `language`.
307+
* @param configDictionary A dictionary of config variables to set.
308+
* @param configFileNames An array of file names containing key-value
309+
* config pairs. Config settings can be set at
310+
* initialization or run-time. Furthermore, they
311+
* could be specified at the same time, in which
312+
* case Tesseract will get variables from every
313+
* config file as well as the dictionary.
314+
* The config files must exist in one of two
315+
* possible folders: tessdata/tessconfigs or
316+
* tessdata/configs.
317+
* @param absoluteDataPath If specified, Tesseract will be initialized to
318+
* use this path as the path prefix for the
319+
* tessdata folder. Consequently, you must have a
320+
* folder named "tessdata" in this path for
321+
* Tesseract to initialize properly (unless you
322+
* also set the `copyFilesFromResources` flag to
323+
* true, in which case the tessdata folder will
324+
* be created for you in this path). If nil, the
325+
* application bundle will be used instead as the
326+
* path prefix for the tessdata folder.
327+
* @param engineMode The engine mode to use in recognition. See
328+
* `engineMode`.
329+
* @param copyFilesFromResources Flag to determine if Tesseract should copy the
330+
* whole contents of the tessdata folder in the
331+
* application bundle to the
332+
* absoluteDataPath/tessdata directory
333+
*
334+
*
335+
* @return The initialized Tesseract object, or `nil` if there was an error.
336+
*/
337+
- (id)initWithLanguage:(NSString *)language
338+
configDictionary:(NSDictionary *)configDictionary
339+
configFileNames:(NSArray *)configFileNames
340+
absoluteDataPath:(NSString *)absoluteDataPath
341+
engineMode:(G8OCREngineMode)engineMode
342+
copyFilesFromResources:(BOOL)copyFilesFromResources NS_DESIGNATED_INITIALIZER;
301343

302344
/**
303345
* Set a Tesseract variable. See G8TesseractParameters.h for the available

TesseractOCR/G8Tesseract.mm

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -89,20 +89,47 @@ - (id)initWithLanguage:(NSString *)language engineMode:(G8OCREngineMode)engineMo
8989
{
9090
return [self initWithLanguage:language configDictionary:nil configFileNames:nil cachesRelatedDataPath:nil engineMode:engineMode];
9191
}
92-
9392
- (id)initWithLanguage:(NSString *)language
9493
configDictionary:(NSDictionary *)configDictionary
9594
configFileNames:(NSArray *)configFileNames
9695
cachesRelatedDataPath:(NSString *)cachesRelatedPath
9796
engineMode:(G8OCREngineMode)engineMode
97+
{
98+
NSString *absoluteDataPath = nil;
99+
if (cachesRelatedPath) {
100+
// config Tesseract to search trainedData in tessdata folder of the Caches folder
101+
NSArray *cachesPaths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES);
102+
NSString *cachesPath = cachesPaths.firstObject;
103+
104+
absoluteDataPath = [cachesPath stringByAppendingPathComponent:cachesRelatedPath].copy;
105+
}
106+
return [self initWithLanguage:language
107+
configDictionary:configDictionary
108+
configFileNames:configFileNames
109+
absoluteDataPath:absoluteDataPath
110+
engineMode:engineMode
111+
copyFilesFromResources:cachesRelatedPath != nil];
112+
}
113+
114+
- (id)initWithLanguage:(NSString *)language
115+
configDictionary:(NSDictionary *)configDictionary
116+
configFileNames:(NSArray *)configFileNames
117+
absoluteDataPath:(NSString *)absoluteDataPath
118+
engineMode:(G8OCREngineMode)engineMode
119+
copyFilesFromResources:(BOOL)copyFilesFromResources
98120
{
99121
self = [super init];
100122
if (self != nil) {
101123
if (configFileNames) {
102124
NSAssert([configFileNames isKindOfClass:[NSArray class]], @"Error! configFileNames should be of type NSArray");
103125
}
104-
105-
_absoluteDataPath = [cachesRelatedPath copy];
126+
if (copyFilesFromResources && absoluteDataPath != nil) {
127+
BOOL moveDataSuccess = [self moveTessdataToDirectoryIfNecessary:absoluteDataPath];
128+
if (moveDataSuccess == NO) {
129+
return nil;
130+
}
131+
}
132+
_absoluteDataPath = [absoluteDataPath copy];
106133
_language = [language copy];
107134
_configDictionary = configDictionary;
108135
_configFileNames = configFileNames;
@@ -116,19 +143,7 @@ - (id)initWithLanguage:(NSString *)language
116143
_monitor->cancel = (CANCEL_FUNC)[self methodForSelector:@selector(tesseractCancelCallbackFunction:)];
117144
_monitor->cancel_this = (__bridge void*)self;
118145

119-
if (self.absoluteDataPath != nil) {
120-
// config Tesseract to search trainedData in tessdata folder of the Caches folder
121-
NSArray *cachesPaths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES);
122-
NSString *cachesPath = cachesPaths.firstObject;
123-
124-
_absoluteDataPath = [cachesPath stringByAppendingPathComponent:_absoluteDataPath].copy;
125-
126-
BOOL success = [self moveTessdataToCachesDirectoryIfNecessary];
127-
if (success == NO) {
128-
return nil;
129-
}
130-
}
131-
else {
146+
if (self.absoluteDataPath == nil) {
132147
// config Tesseract to search trainedData in tessdata folder of the application bundle];
133148
_absoluteDataPath = [NSString stringWithFormat:@"%@", [NSString stringWithString:[NSBundle mainBundle].bundlePath]].copy;
134149
}
@@ -208,14 +223,14 @@ - (BOOL)resetEngine
208223
return isInitDone;
209224
}
210225

211-
- (BOOL)moveTessdataToCachesDirectoryIfNecessary
226+
- (BOOL)moveTessdataToDirectoryIfNecessary:(NSString *)directoryPath
212227
{
213228
NSFileManager *fileManager = [NSFileManager defaultManager];
214229

215230
// Useful paths
216231
NSString *tessdataFolderName = @"tessdata";
217232
NSString *tessdataPath = [[NSBundle mainBundle].resourcePath stringByAppendingPathComponent:tessdataFolderName];
218-
NSString *destinationPath = [self.absoluteDataPath stringByAppendingPathComponent:tessdataFolderName];
233+
NSString *destinationPath = [directoryPath stringByAppendingPathComponent:tessdataFolderName];
219234
NSLog(@"Tesseract destination path: %@", destinationPath);
220235

221236
if ([fileManager fileExistsAtPath:destinationPath] == NO) {

TestsProject/TestsProjectTests/InitializationTests.m

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ - (BOOL)resetEngine;
2424

2525
NSFileManager *fileManager = [NSFileManager defaultManager];
2626
NSString *resourcePath = [NSBundle mainBundle].resourcePath;
27+
NSString *customDirectoryPath = [[NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDomainMask, YES) firstObject] stringByAppendingPathComponent:@"testDirectory"];
2728
NSString *tessdataFolderName = @"tessdata";
2829
NSString *tessdataFolderPathFromTheBundle = [[resourcePath stringByAppendingPathComponent:tessdataFolderName] stringByAppendingString:@"/"];
2930
NSString *debugConfigsFileName = @"debugConfigs.txt";
@@ -85,6 +86,120 @@ - (BOOL)resetEngine;
8586
[[NSNotificationCenter defaultCenter] postNotificationName:UIApplicationDidReceiveMemoryWarningNotification object:nil];
8687
});
8788
});
89+
90+
NSString *customTessDataPath = [customDirectoryPath stringByAppendingPathComponent:@"tessdata"];
91+
void(^cleanCustomTessdataFolder)() = ^{
92+
NSError *error = nil;
93+
BOOL fileIsRemoved = [fileManager removeItemAtPath:customTessDataPath error:&error];
94+
if (error != nil) {
95+
NSLog(@"Error deleting tessdata folder from the custom directory: %@", error);
96+
}
97+
NSAssert(fileIsRemoved == YES, @"Error cleaning tessdata from the custom directory");
98+
99+
// check tessdata folder was deleted
100+
NSArray *directoryContent = [fileManager contentsOfDirectoryAtPath:customDirectoryPath error:&error];
101+
if (error != nil) {
102+
NSLog(@"Error getting the contents of the custom directory: %@", error);
103+
}
104+
NSAssert([directoryContent containsObject:customDirectoryPath] == NO, @"Assert! Tessdata path was not removed from the Caches folder");
105+
};
106+
107+
BOOL(^copyDataToCustomDirectory)() = ^{
108+
{
109+
// Useful paths
110+
NSString *tessdataFolderName = @"tessdata";
111+
NSString *tessdataPath = [[NSBundle mainBundle].resourcePath stringByAppendingPathComponent:tessdataFolderName];
112+
NSString *destinationPath = customTessDataPath;
113+
NSLog(@"Tesseract destination path: %@", destinationPath);
114+
115+
if ([fileManager fileExistsAtPath:destinationPath] == NO) {
116+
NSError *error = nil;
117+
BOOL res = [fileManager createDirectoryAtPath:destinationPath withIntermediateDirectories:YES attributes:nil error:&error];
118+
if (res == NO) {
119+
NSLog(@"Error creating folder %@: %@", destinationPath, error);
120+
return NO;
121+
}
122+
}
123+
124+
BOOL result = YES;
125+
NSError *error = nil;
126+
NSArray *files = [fileManager contentsOfDirectoryAtPath:tessdataPath error:&error];
127+
if (files == nil) {
128+
NSLog(@"ERROR! %@", error.description);
129+
result = NO;
130+
} else {
131+
for (NSString *filename in files) {
132+
133+
NSString *destinationFileName = [destinationPath stringByAppendingPathComponent:filename];
134+
if (![fileManager fileExistsAtPath:destinationFileName]) {
135+
136+
NSString *filePath = [tessdataPath stringByAppendingPathComponent:filename];
137+
138+
// delete broken symlinks first
139+
[fileManager removeItemAtPath:destinationFileName error:&error];
140+
141+
// than recreate it
142+
error = nil; // don't care about previous error, that can happens if we tried to remove a symlink, which doesn't exist
143+
BOOL res = [fileManager createSymbolicLinkAtPath:destinationFileName
144+
withDestinationPath:filePath
145+
error:&error];
146+
if (res == NO) {
147+
NSLog(@"Error creating symlink %@: %@", destinationPath, error);
148+
result = NO;
149+
}
150+
}
151+
}
152+
}
153+
return result;
154+
}
155+
};
156+
157+
context(@"initialize with absoluteDataPath", ^{
158+
159+
it(@"Should initialize simple", ^{
160+
G8Tesseract *tesseract = [[G8Tesseract alloc] initWithLanguage:kG8Languages configDictionary:nil configFileNames:nil absoluteDataPath:nil engineMode:G8OCREngineModeTesseractOnly copyFilesFromResources:NO];
161+
[[tesseract shouldNot] beNil];
162+
163+
[[tesseract.absoluteDataPath should] equal:resourcePath];
164+
165+
tesseract = [[G8Tesseract alloc] initWithLanguage:kG8Languages configDictionary:nil configFileNames:nil absoluteDataPath:customDirectoryPath engineMode:G8OCREngineModeTesseractOnly copyFilesFromResources:NO];
166+
[[tesseract should] beNil];
167+
168+
BOOL isDirectory = NO;
169+
[[theValue([fileManager fileExistsAtPath:customTessDataPath isDirectory:&isDirectory]) should] beNo];
170+
[[theValue(isDirectory) should] beNo];
171+
172+
copyDataToCustomDirectory();
173+
174+
isDirectory = NO;
175+
[[theValue([fileManager fileExistsAtPath:customTessDataPath isDirectory:&isDirectory]) should] beYes];
176+
[[theValue(isDirectory) should] beYes];
177+
178+
tesseract = [[G8Tesseract alloc] initWithLanguage:kG8Languages configDictionary:nil configFileNames:nil absoluteDataPath:customDirectoryPath engineMode:G8OCREngineModeTesseractOnly copyFilesFromResources:NO];
179+
[[tesseract shouldNot] beNil];
180+
181+
[[tesseract.absoluteDataPath should] equal:customDirectoryPath];
182+
183+
isDirectory = NO;
184+
[[theValue([fileManager fileExistsAtPath:customTessDataPath isDirectory:&isDirectory]) should] beYes];
185+
[[theValue(isDirectory) should] beYes];
186+
187+
cleanCustomTessdataFolder();
188+
189+
tesseract = [[G8Tesseract alloc] initWithLanguage:kG8Languages configDictionary:nil configFileNames:nil absoluteDataPath:customDirectoryPath engineMode:G8OCREngineModeTesseractOnly copyFilesFromResources:YES];
190+
[[tesseract shouldNot] beNil];
191+
192+
[[tesseract.absoluteDataPath should] equal:customDirectoryPath];
193+
194+
isDirectory = NO;
195+
[[theValue([fileManager fileExistsAtPath:[customDirectoryPath stringByAppendingPathComponent:@"tessdata"] isDirectory:&isDirectory]) should] beYes];
196+
[[theValue(isDirectory) should] beYes];
197+
198+
cleanCustomTessdataFolder();
199+
200+
});
201+
202+
});
88203

89204
context(@"nil cachesRelatedDataPath", ^{
90205

0 commit comments

Comments
 (0)