66from PIL import Image
77import io
88
9- import base64 , os
9+
10+ import base64 , json , os
1011from utils import check_ocr_box , get_yolo_model , get_caption_model_processor , get_som_labeled_img
1112import torch
1213from PIL import Image
@@ -68,12 +69,17 @@ def process(
6869 ocr_bbox_rslt , is_goal_filtered = check_ocr_box (image_save_path , display_img = False , output_bb_format = 'xyxy' , goal_filtering = None , easyocr_args = {'paragraph' : False , 'text_threshold' :0.9 }, use_paddleocr = use_paddleocr )
6970 text , ocr_bbox = ocr_bbox_rslt
7071 # print('prompt:', prompt)
71-
7272 dino_labled_img , label_coordinates , parsed_content_list = get_som_labeled_img (image_save_path , yolo_model , BOX_TRESHOLD = box_threshold , output_coord_in_ratio = True , ocr_bbox = ocr_bbox ,draw_bbox_config = draw_bbox_config , caption_model_processor = caption_model_processor , ocr_text = text ,iou_threshold = iou_threshold )
7373 image = Image .open (io .BytesIO (base64 .b64decode (dino_labled_img )))
74-
7574 print ('finish processing' )
76- return image , str (parsed_content_list ), str (label_coordinates )
75+ print ('finish processing' )
76+ output_dict = {
77+ "parsed_content_list" : parsed_content_list ,
78+ "label_coordinates" : label_coordinates ,
79+ }
80+ combined_text_output = json .dumps (output_dict , indent = 2 )
81+ return image , combined_text_output
82+
7783
7884
7985with gr .Blocks () as demo :
@@ -108,4 +114,4 @@ def process(
108114 )
109115
110116# demo.launch(debug=False, show_error=True, share=True)
111- demo .launch (share = True , server_port = 7861 , server_name = '0.0.0.0' )
117+ demo .launch (share = True , server_port = 7861 , server_name = '0.0.0.0' )
0 commit comments