Skip to content

Commit be04aff

Browse files
committed
Add paddleocr option
1 parent ba4e04f commit be04aff

File tree

2 files changed

+29
-11
lines changed

2 files changed

+29
-11
lines changed

gradio_demo.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,15 @@
5858
def process(
5959
image_input,
6060
box_threshold,
61-
iou_threshold
61+
iou_threshold,
62+
use_paddleocr
6263
) -> Optional[Image.Image]:
6364

6465
image_save_path = 'imgs/saved_image_demo.png'
6566
image_input.save(image_save_path)
6667
# import pdb; pdb.set_trace()
6768

68-
ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9})
69+
ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=use_paddleocr)
6970
text, ocr_bbox = ocr_bbox_rslt
7071
# print('prompt:', prompt)
7172
dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(image_save_path, yolo_model, BOX_TRESHOLD = box_threshold, output_coord_in_ratio=True, ocr_bbox=ocr_bbox,draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor, ocr_text=text,iou_threshold=iou_threshold)
@@ -88,6 +89,8 @@ def process(
8889
# set the threshold for removing the bounding boxes with large overlap, default is 0.1
8990
iou_threshold_component = gr.Slider(
9091
label='IOU Threshold', minimum=0.01, maximum=1.0, step=0.01, value=0.1)
92+
use_paddleocr_component = gr.Checkbox(
93+
label='Use PaddleOCR', default=True)
9194
submit_button_component = gr.Button(
9295
value='Submit', variant='primary')
9396
with gr.Column():
@@ -99,7 +102,8 @@ def process(
99102
inputs=[
100103
image_input_component,
101104
box_threshold_component,
102-
iou_threshold_component
105+
iou_threshold_component,
106+
use_paddleocr_component
103107
],
104108
outputs=[image_output_component, text_output_component]
105109
)

utils.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,17 @@
1818
# %matplotlib inline
1919
from matplotlib import pyplot as plt
2020
import easyocr
21+
from paddleocr import PaddleOCR
2122
reader = easyocr.Reader(['en'])
23+
paddle_ocr = PaddleOCR(
24+
lang='en', # other lang also available
25+
use_angle_cls=False,
26+
use_gpu=False, # using cuda will conflict with pytorch in the same process
27+
show_log=False,
28+
max_batch_size=1024,
29+
use_dilation=True, # improves accuracy
30+
det_db_score_mode='slow', # improves accuracy
31+
rec_batch_num=1024)
2232
import time
2333
import base64
2434

@@ -370,14 +380,18 @@ def get_xywh_yolo(input):
370380

371381

372382

373-
def check_ocr_box(image_path, display_img = True, output_bb_format='xywh', goal_filtering=None, easyocr_args=None):
374-
if easyocr_args is None:
375-
easyocr_args = {}
376-
result = reader.readtext(image_path, **easyocr_args)
377-
is_goal_filtered = False
378-
# print('goal filtering pred:', result[-5:])
379-
coord = [item[0] for item in result]
380-
text = [item[1] for item in result]
383+
def check_ocr_box(image_path, display_img = True, output_bb_format='xywh', goal_filtering=None, easyocr_args=None, use_paddleocr=True):
384+
if use_paddleocr:
385+
result = paddle_ocr.ocr(image_path, cls=False)[0]
386+
coord = [item[0] for item in result]
387+
text = [item[1][0] for item in result]
388+
else: # EasyOCR
389+
if easyocr_args is None:
390+
easyocr_args = {}
391+
result = reader.readtext(image_path, **easyocr_args)
392+
# print('goal filtering pred:', result[-5:])
393+
coord = [item[0] for item in result]
394+
text = [item[1] for item in result]
381395
# read the image using cv2
382396
if display_img:
383397
opencv_img = cv2.imread(image_path)

0 commit comments

Comments
 (0)