@@ -106,6 +106,24 @@ def process_paragraph(bboxes, texts, rect_box):
106106
107107 return up_table , down_table
108108
109+ def process_whole_paragraph (general_ocr_res ):
110+ boxes = general_ocr_res ["bboxes" ]
111+ texts = general_ocr_res ["texts" ]
112+ rowcol = general_ocr_res ["row_col_info" ]
113+
114+ max_row = max (row [0 ] for row in rowcol ) + 1
115+ max_col = max (row [1 ] for row in rowcol ) + 1
116+
117+ layout_text = [["" for _ in range (max_col )] for _ in range (max_row )]
118+ layout_boxs = [[[] for _ in range (max_col )] for _ in range (max_row )]
119+
120+ # 填充layout
121+ for (row , col ), txt , box in zip (rowcol , texts , boxes ):
122+ layout_text [row ][col ] = txt
123+ layout_boxs [row ][col ] = box
124+ return layout_text , layout_boxs
125+
126+
109127class OCRAgent (object ):
110128 def __init__ (self , ** kwargs ):
111129 self .ep = kwargs .get ("ocr_model_ep" )
@@ -122,67 +140,19 @@ def predict(self, inp) -> List[BlockInfo]:
122140 req_data = {"param" : params , "data" : [b64_image ]}
123141 try :
124142 r = self .client .post (url = self .ep , json = req_data , timeout = self .timeout )
125- # ret = convert_json(r.json())
126- # return ret
143+
127144 except requests .exceptions .Timeout :
128145 raise Exception (f"timeout in formula agent predict" )
129146 except Exception as e :
130147 raise Exception (f"exception in formula agent predict: [{ e } ]" )
131148
132- table_rect_box = []
133- table_md_str = ""
134- if 'table_result' in r ["data" ]["json" ] and len (r ["data" ]["json" ]["table_result" ][0 ]["cell_infos" ]):
135- table_result = r ["data" ]["json" ]["table_result" ][0 ]["cell_infos" ]
136- table_md_str ,table_rect_box = process_table (table_result )
137-
138- bboxes = r ["data" ]["json" ]["general_ocr_res" ]["bboxes" ]
139- texts = r ["data" ]["json" ]["general_ocr_res" ]["texts" ]
140-
141- up_table , down_table = process_paragraph (bboxes , texts , table_rect_box )
142- res = list ()
143- if len (table_rect_box ):
144- b1 = BlockInfo (
145- block = [],
146- block_text = table_md_str ,
147- block_no = 0 ,
148- ts = ["" ],
149- rs = [table_rect_box ],
150- layout_type = 1 ,
151- )
152- if len (up_table ["boxs" ]):
153- text = "" .join (up_table ['texts' ])
154- box = recalculate_xy (up_table ['boxs' ])
155- res .append (BlockInfo (
156- block = [],
157- block_text = text ,
158- block_no = 0 ,
159- ts = [text ],
160- rs = [box ],
161- layout_type = 0 ,
162- ))
163- res .append (b1 )
164- if len (down_table ["boxs" ]):
165- text = "" .join (down_table ['texts' ])
166- box = recalculate_xy (down_table ['boxs' ])
167- res .append (BlockInfo (
168- block = [],
169- block_text = text ,
170- block_no = 0 ,
171- ts = [text ],
172- rs = [box ],
173- layout_type = 0 ,
174- ))
175- return res
176- else :
177- text = "" .join (up_table ['texts' ])
178- box = recalculate_xy (up_table ['boxs' ])
179- b0 = BlockInfo (
180- block = [],
181- block_text = "abcdef" ,
182- block_no = 0 ,
183- ts = ["abc" , "def" ],
184- rs = [[0 , 0 , 100 , 30 ], [0 , 50 , 100 , 80 ]],
185- layout_type = 0 ,
186- )
187- return [b0 ]
188-
149+ layout_text , layout_boxs = process_whole_paragraph (r ["data" ]["json" ]["general_ocr_res" ])
150+ b0 = BlockInfo (
151+ block = [],
152+ block_text = '' .join (['' .join (text ) for text in layout_text ]),
153+ block_no = 0 ,
154+ ts = ['' .join (text ) for text in layout_text ],
155+ rs = ['' .join (text ) for text in layout_boxs ],
156+ layout_type = 0 ,
157+ )
158+ return [b0 ]
0 commit comments