|
1 | | -from typing import Optional, Text, Tuple |
| 1 | +from typing import Optional |
| 2 | + |
2 | 3 | import gradio as gr |
| 4 | +import numpy as np |
3 | 5 | import torch |
4 | 6 | from PIL import Image |
5 | 7 | import io |
6 | | -import base64 |
7 | | -import json |
8 | | -import numpy as np |
9 | 8 |
|
| 9 | +import base64, os |
10 | 10 | from utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img |
11 | | - |
12 | | -class NumpyEncoder(json.JSONEncoder): |
13 | | - def default(self, obj): |
14 | | - if isinstance(obj, np.ndarray): |
15 | | - return obj.tolist() |
16 | | - if isinstance(obj, np.float32): |
17 | | - return float(obj) |
18 | | - return json.JSONEncoder.default(self, obj) |
| 11 | +import torch |
| 12 | +from PIL import Image |
19 | 13 |
|
20 | 14 | yolo_model = get_yolo_model(model_path='weights/icon_detect/best.pt') |
21 | 15 | caption_model_processor = get_caption_model_processor(model_name="florence2", model_name_or_path="weights/icon_caption_florence") |
@@ -69,27 +63,17 @@ def process( |
69 | 63 |
|
70 | 64 | image_save_path = 'imgs/saved_image_demo.png' |
71 | 65 | image_input.save(image_save_path) |
| 66 | + # import pdb; pdb.set_trace() |
72 | 67 |
|
73 | 68 | ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=use_paddleocr) |
74 | 69 | text, ocr_bbox = ocr_bbox_rslt |
| 70 | + # print('prompt:', prompt) |
75 | 71 |
|
76 | 72 | dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(image_save_path, yolo_model, BOX_TRESHOLD = box_threshold, output_coord_in_ratio=True, ocr_bbox=ocr_bbox,draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor, ocr_text=text,iou_threshold=iou_threshold) |
77 | | - |
78 | | - # Convert base64 string to PIL Image |
79 | 73 | image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img))) |
80 | 74 |
|
81 | 75 | print('finish processing') |
82 | | - |
83 | | - # Combine text and bounding boxes into JSON-friendly format |
84 | | - result = { |
85 | | - "label_coordinates": label_coordinates, |
86 | | - "parsed_content_list": parsed_content_list, |
87 | | - } |
88 | | - |
89 | | - # Convert to JSON string format for return using the custom encoder |
90 | | - result_json = json.dumps(result, indent=4, cls=NumpyEncoder) |
91 | | - |
92 | | - return image, result_json |
| 76 | + return image, str(parsed_content_list), str(label_coordinates) |
93 | 77 |
|
94 | 78 |
|
95 | 79 | with gr.Blocks() as demo: |
|
0 commit comments