-
-
Notifications
You must be signed in to change notification settings - Fork 2.9k
OCR integration #13313
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
OCR integration #13313
Changes from 1 commit
a51e3b0
aca504a
48ffb06
5a256ae
f80cec8
db1f577
4020d3a
4987978
9842dd4
8b133e6
42704ea
a64e1ea
6069bc1
5734252
ab98a3a
62dce25
0949300
e4e45f3
15272a6
14332af
5488deb
26a8500
e0da447
f1a06ac
42d34ca
3ffa1c1
59d87a2
bc36a94
e7a043d
caa48f1
9b51b23
c115803
f16c1f4
8c51016
5843c69
1bd2e5b
c7fcc6b
a8bab25
9b9c7b7
edc0343
695e2b4
e5e651e
9ffdbff
facb463
265a312
e4d16c2
cd52669
2eb2df2
809e5a1
d0a20e7
ce0328d
da6b8c8
a4e5d20
af085b9
dd8d211
a3a90a8
e7dba5e
d13ea24
14cf72d
c5b0860
66bfdd6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -156,40 +156,35 @@ private void configureTessdata() { | |
*/ | ||
private boolean setTessdataPath(String pathStr) { | ||
try { | ||
Path path = Path.of(pathStr); | ||
|
||
// Resolve symbolic links to get the real path | ||
Path realPath = path.toRealPath(); | ||
LOGGER.debug("Original path: {}, Real path: {}", path, realPath); | ||
|
||
// Check if this is the tessdata directory itself | ||
if (realPath.getFileName() != null && realPath.getFileName().toString().equals("tessdata")) { | ||
Path engData = realPath.resolve("eng.traineddata"); | ||
LOGGER.debug("Checking for eng.traineddata at: {}", engData); | ||
if (Files.exists(realPath) && Files.isDirectory(realPath) && Files.exists(engData)) { | ||
// Tesseract expects the parent of tessdata | ||
String parentPath = realPath.getParent().toString(); | ||
LOGGER.debug("Setting datapath to parent: {}", parentPath); | ||
tesseract.setDatapath(parentPath); | ||
return true; | ||
} | ||
} else { | ||
// Check if this is the parent of tessdata | ||
Path tessdata = realPath.resolve("tessdata"); | ||
Path engData = tessdata.resolve("eng.traineddata"); | ||
LOGGER.debug("Checking tessdata at: {} and eng.traineddata at: {}", tessdata, engData); | ||
if (Files.exists(tessdata) && Files.isDirectory(tessdata) && Files.exists(engData)) { | ||
LOGGER.debug("Setting datapath to: {}", realPath); | ||
tesseract.setDatapath(realPath.toString()); | ||
Path path = Path.of(pathStr).toRealPath(); | ||
LOGGER.debug("Original path: {}, Real path: {}", pathStr, path); | ||
|
||
// ─── Case1: caller already gave the tessdata folder ──────────────────── | ||
if ("tessdata".equals(path.getFileName().toString())) { | ||
Path engData = path.resolve("eng.traineddata"); | ||
LOGGER.debug("Looking for eng.traineddata at {}", engData); | ||
if (Files.isRegularFile(engData)) { | ||
tesseract.setDatapath(path.toString()); | ||
return true; | ||
} | ||
} | ||
|
||
// ─── Case2: caller gave parent directory ────────────────────────────── | ||
Kaan0029 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Path tessdata = path.resolve("tessdata"); | ||
Path engData = tessdata.resolve("eng.traineddata"); | ||
LOGGER.debug("Looking for tessdata at {}, eng.traineddata at {}", tessdata, engData); | ||
if (Files.isDirectory(tessdata) && Files.isRegularFile(engData)) { | ||
tesseract.setDatapath(tessdata.toString()); | ||
return true; | ||
} | ||
|
||
} catch (Exception e) { | ||
LOGGER.debug("Invalid path: {}", pathStr, e); | ||
LOGGER.debug("Invalid tessdata path: {}", pathStr, e); | ||
} | ||
return false; | ||
return false; // nothing usable found | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Catching generic Exception is too broad and masks specific issues. Should catch specific exceptions like IOException or SecurityException for better error handling. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, trag bot probably selected wrong lines, but overall he's right, the |
||
|
||
/** | ||
* Gets the list of default tessdata paths based on the operating system. | ||
*/ | ||
|
Uh oh!
There was an error while loading. Please reload this page.