Skip to content
This repository was archived by the owner on Jun 14, 2018. It is now read-only.

Commit ce23c24

Browse files
authored
Merge pull request #88 from ZoranPavlovic/feature/minor_changes
Feature/minor changes
2 parents 1ba207c + 45361a8 commit ce23c24

File tree

3 files changed

+22
-31
lines changed

3 files changed

+22
-31
lines changed

src/pyocr/libtesseract/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
from ..error import TesseractError
2121
from ..util import digits_only
2222

23+
import logging
24+
logger = logging.getLogger(__name__)
25+
2326

2427
__all__ = [
2528
'can_detect_orientation',
@@ -212,11 +215,13 @@ def is_available():
212215
if not available:
213216
return False
214217
version = get_version()
218+
215219
# C-API with Tesseract <= 3.02 segfaults sometimes
216220
# (seen with Debian stable + Paperwork)
217221
# not tested with 3.03
218222
if (version[0] < 3 or
219223
(version[0] == 3 and version[1] < 4)):
224+
logger.warning("Unsupported version [%s]" % ".".join([str(r) for r in version]))
220225
return False
221226
return True
222227

src/pyocr/libtesseract/tesseract_raw.py

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,14 @@
4444

4545
g_libtesseract = None
4646

47+
lib_load_errors = []
4748
for libname in libnames:
4849
try:
4950
g_libtesseract = ctypes.cdll.LoadLibrary(libname)
51+
lib_load_errors = []
5052
break
51-
except OSError:
52-
pass
53+
except OSError as ex:
54+
lib_load_errors.append((libname, ex.message))
5355

5456

5557
class PageSegMode(object):
@@ -353,23 +355,20 @@ def init(lang=None):
353355

354356

355357
def cleanup(handle):
358+
assert(g_libtesseract)
356359
g_libtesseract.TessBaseAPIDelete(ctypes.c_void_p(handle))
357360

358361

359362
def is_available():
360-
global g_libtesseract
361363
return g_libtesseract is not None
362364

363365

364366
def get_version():
365-
global g_libtesseract
366367
assert(g_libtesseract)
367-
368368
return g_libtesseract.TessVersion().decode("utf-8")
369369

370370

371371
def get_available_languages(handle):
372-
global g_libtesseract
373372
assert(g_libtesseract)
374373

375374
langs = []
@@ -385,7 +384,6 @@ def get_available_languages(handle):
385384

386385

387386
def set_is_numeric(handle, mode):
388-
global g_libtesseract
389387
assert(g_libtesseract)
390388

391389
if mode:
@@ -401,7 +399,6 @@ def set_is_numeric(handle, mode):
401399

402400

403401
def set_debug_file(handle, filename):
404-
global g_libtesseract
405402
assert(g_libtesseract)
406403

407404
if not isinstance(filename, bytes):
@@ -415,7 +412,6 @@ def set_debug_file(handle, filename):
415412

416413

417414
def set_page_seg_mode(handle, mode):
418-
global g_libtesseract
419415
assert(g_libtesseract)
420416

421417
g_libtesseract.TessBaseAPISetPageSegMode(
@@ -424,14 +420,12 @@ def set_page_seg_mode(handle, mode):
424420

425421

426422
def init_for_analyse_page(handle):
427-
global g_libtesseract
428423
assert(g_libtesseract)
429424

430425
g_libtesseract.TessBaseAPIInitForAnalysePage(ctypes.c_void_p(handle))
431426

432427

433428
def set_image(handle, image):
434-
global g_libtesseract
435429
assert(g_libtesseract)
436430

437431
image = image.convert("RGB")
@@ -451,7 +445,6 @@ def set_image(handle, image):
451445

452446

453447
def recognize(handle):
454-
global g_libtesseract
455448
assert(g_libtesseract)
456449

457450
return g_libtesseract.TessBaseAPIRecognize(
@@ -460,35 +453,32 @@ def recognize(handle):
460453

461454

462455
def analyse_layout(handle):
463-
global g_libtesseract
464456
assert(g_libtesseract)
465457

466458
return g_libtesseract.TessBaseAPIAnalyseLayout(ctypes.c_void_p(handle))
467459

468460

469461
def get_utf8_text(handle):
462+
assert(g_libtesseract)
470463
ptr = g_libtesseract.TessBaseAPIGetUTF8Text(ctypes.c_void_p(handle))
471464
val = ctypes.cast(ptr, ctypes.c_char_p).value.decode("utf-8")
472465
g_libtesseract.TessDeleteText(ptr)
473466
return val
474467

475468

476469
def page_iterator_delete(iterator):
477-
global g_libtesseract
478470
assert(g_libtesseract)
479471

480472
return g_libtesseract.TessPageIteratorDelete(ctypes.c_void_p(iterator))
481473

482474

483475
def page_iterator_next(iterator, level):
484-
global g_libtesseract
485476
assert(g_libtesseract)
486477

487478
return g_libtesseract.TessPageIteratorNext(ctypes.c_void_p(iterator), level)
488479

489480

490481
def page_iterator_is_at_beginning_of(iterator, level):
491-
global g_libtesseract
492482
assert(g_libtesseract)
493483

494484
return g_libtesseract.TessPageIteratorIsAtBeginningOf(
@@ -497,7 +487,6 @@ def page_iterator_is_at_beginning_of(iterator, level):
497487

498488

499489
def page_iterator_is_at_final_element(iterator, level, element):
500-
global g_libtesseract
501490
assert(g_libtesseract)
502491

503492
return g_libtesseract.TessPageIteratorIsAtFinalElement(
@@ -506,7 +495,6 @@ def page_iterator_is_at_final_element(iterator, level, element):
506495

507496

508497
def page_iterator_block_type(iterator):
509-
global g_libtesseract
510498
assert(g_libtesseract)
511499

512500
return g_libtesseract.TessPageIteratorBlockType(
@@ -515,7 +503,6 @@ def page_iterator_block_type(iterator):
515503

516504

517505
def page_iterator_bounding_box(iterator, level):
518-
global g_libtesseract
519506
assert(g_libtesseract)
520507

521508
left = ctypes.c_int(0)
@@ -541,7 +528,6 @@ def page_iterator_bounding_box(iterator, level):
541528

542529

543530
def page_iterator_orientation(iterator):
544-
global g_libtesseract
545531
assert(g_libtesseract)
546532

547533
orientation = ctypes.c_int(0)
@@ -566,15 +552,13 @@ def page_iterator_orientation(iterator):
566552

567553

568554
def get_iterator(handle):
569-
global g_libtesseract
570555
assert(g_libtesseract)
571556

572557
i = g_libtesseract.TessBaseAPIGetIterator(ctypes.c_void_p(handle))
573558
return i
574559

575560

576561
def result_iterator_get_page_iterator(res_iterator):
577-
global g_libtesseract
578562
assert(g_libtesseract)
579563

580564
return g_libtesseract.TessResultIteratorGetPageIterator(
@@ -583,6 +567,7 @@ def result_iterator_get_page_iterator(res_iterator):
583567

584568

585569
def result_iterator_get_utf8_text(iterator, level):
570+
assert(g_libtesseract)
586571
ptr = g_libtesseract.TessResultIteratorGetUTF8Text(
587572
ctypes.c_void_p(iterator), level
588573
)
@@ -592,7 +577,9 @@ def result_iterator_get_utf8_text(iterator, level):
592577
g_libtesseract.TessDeleteText(ptr)
593578
return val
594579

580+
595581
def result_iterator_get_confidence(iterator, level):
582+
assert(g_libtesseract)
596583
ptr = g_libtesseract.TessResultIteratorConfidence(
597584
ctypes.c_void_p(iterator), level
598585
)
@@ -601,8 +588,8 @@ def result_iterator_get_confidence(iterator, level):
601588
val = ctypes.c_float(ptr).value
602589
return val
603590

591+
604592
def detect_os(handle):
605-
global g_libtesseract
606593
assert(g_libtesseract)
607594

608595
# Use the new API function if it is available, because since Tesseract
@@ -642,7 +629,6 @@ def detect_os(handle):
642629

643630

644631
def set_input_name(handle, input_file):
645-
global g_libtesseract
646632
assert(g_libtesseract)
647633

648634
g_libtesseract.TessBaseAPISetInputName(
@@ -652,7 +638,6 @@ def set_input_name(handle, input_file):
652638

653639

654640
def init_pdf_renderer(handle, output_file, textonly):
655-
global g_libtesseract
656641
assert(g_libtesseract)
657642

658643
tessdata_dir = g_libtesseract.TessBaseAPIGetDatapath(handle)
@@ -667,7 +652,6 @@ def init_pdf_renderer(handle, output_file, textonly):
667652

668653

669654
def begin_document(renderer, doc_name):
670-
global g_libtesseract
671655
assert(g_libtesseract)
672656

673657
g_libtesseract.TessResultRendererBeginDocument(
@@ -677,7 +661,6 @@ def begin_document(renderer, doc_name):
677661

678662

679663
def add_renderer_image(handle, renderer):
680-
global g_libtesseract
681664
assert(g_libtesseract)
682665

683666
g_libtesseract.TessResultRendererAddImage(
@@ -687,7 +670,6 @@ def add_renderer_image(handle, renderer):
687670

688671

689672
def end_document(renderer):
690-
global g_libtesseract
691673
assert(g_libtesseract)
692674

693675
g_libtesseract.TessResultRendererEndDocument(

src/pyocr/tesseract.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,10 @@ def detect_orientation(image, lang=None):
202202

203203
original_output = original_output.decode("utf-8")
204204
original_output = original_output.strip()
205+
206+
if "Could not initialize tesseract" in original_output:
207+
raise TesseractError(-1, "Error initializing tesseract: %s" % original_output)
208+
205209
try:
206210
output = original_output.split("\n")
207211
output = [line.split(": ", 1) for line in output if (": " in line)]
@@ -214,9 +218,9 @@ def detect_orientation(image, lang=None):
214218
'angle': angle,
215219
'confidence': float(output['Orientation confidence']),
216220
}
217-
except:
218-
raise TesseractError(-1, "No script found in image (%s)"
219-
% original_output)
221+
except Exception as ex:
222+
raise TesseractError(-1, "No script found in image (%s - %s)"
223+
% (ex.message, original_output))
220224

221225

222226
def get_name():

0 commit comments

Comments
 (0)