Skip to content

Commit b8b952c

Browse files
committed
undo changes to gradio_demo.py
1 parent 169dd20 commit b8b952c

File tree

1 file changed

+9
-25
lines changed

1 file changed

+9
-25
lines changed

gradio_demo.py

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,15 @@
1-
from typing import Optional, Text, Tuple
1+
from typing import Optional
2+
23
import gradio as gr
4+
import numpy as np
35
import torch
46
from PIL import Image
57
import io
6-
import base64
7-
import json
8-
import numpy as np
98

9+
import base64, os
1010
from utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img
11-
12-
class NumpyEncoder(json.JSONEncoder):
13-
def default(self, obj):
14-
if isinstance(obj, np.ndarray):
15-
return obj.tolist()
16-
if isinstance(obj, np.float32):
17-
return float(obj)
18-
return json.JSONEncoder.default(self, obj)
11+
import torch
12+
from PIL import Image
1913

2014
yolo_model = get_yolo_model(model_path='weights/icon_detect/best.pt')
2115
caption_model_processor = get_caption_model_processor(model_name="florence2", model_name_or_path="weights/icon_caption_florence")
@@ -69,27 +63,17 @@ def process(
6963

7064
image_save_path = 'imgs/saved_image_demo.png'
7165
image_input.save(image_save_path)
66+
# import pdb; pdb.set_trace()
7267

7368
ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=use_paddleocr)
7469
text, ocr_bbox = ocr_bbox_rslt
70+
# print('prompt:', prompt)
7571

7672
dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(image_save_path, yolo_model, BOX_TRESHOLD = box_threshold, output_coord_in_ratio=True, ocr_bbox=ocr_bbox,draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor, ocr_text=text,iou_threshold=iou_threshold)
77-
78-
# Convert base64 string to PIL Image
7973
image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
8074

8175
print('finish processing')
82-
83-
# Combine text and bounding boxes into JSON-friendly format
84-
result = {
85-
"label_coordinates": label_coordinates,
86-
"parsed_content_list": parsed_content_list,
87-
}
88-
89-
# Convert to JSON string format for return using the custom encoder
90-
result_json = json.dumps(result, indent=4, cls=NumpyEncoder)
91-
92-
return image, result_json
76+
return image, str(parsed_content_list), str(label_coordinates)
9377

9478

9579
with gr.Blocks() as demo:

0 commit comments

Comments
 (0)