Skip to content

Commit 176bf3d

Browse files
authored
Merge pull request #3240 from opendatalab/release-2.1.10
Release 2.1.10
2 parents a67ff87 + a50616b commit 176bf3d

File tree

4 files changed

+129
-31
lines changed

4 files changed

+129
-31
lines changed

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,13 @@
4343
</div>
4444

4545
# Changelog
46-
- 2025/07/30 version 2.1.9 Released
46+
- 2025/08/01 2.1.10 Released
47+
- Fixed an issue in the `pipeline` backend where block overlap caused the parsing results to deviate from expectations #3232
48+
- 2025/07/30 2.1.9 Released
4749
- `transformers` 4.54.1 version adaptation
48-
- 2025/07/28 version 2.1.8 Released
50+
- 2025/07/28 2.1.8 Released
4951
- `sglang` 0.4.9.post5 version adaptation
50-
- 2025/07/27 version 2.1.7 Released
52+
- 2025/07/27 2.1.7 Released
5153
- `transformers` 4.54.0 version adaptation
5254
- 2025/07/26 2.1.6 Released
5355
- Fixed table parsing issues in handwritten documents when using `vlm` backend

README_zh-CN.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
</div>
4444

4545
# 更新记录
46+
- 2025/08/01 2.1.10 发布
47+
- 修复`pipeline`后端因block覆盖导致的解析结果与预期不符 #3232
4648
- 2025/07/30 2.1.9 发布
4749
- `transformers` 4.54.1 版本适配
4850
- 2025/07/28 2.1.8 发布

mineru/backend/pipeline/model_json_to_middle_json.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -55,25 +55,34 @@ def page_model_info_to_page_info(page_model_info, image_dict, page, image_writer
5555
"""某些图可能是文本块,通过简单的规则判断一下"""
5656
if len(maybe_text_image_blocks) > 0:
5757
for block in maybe_text_image_blocks:
58-
span_in_block_list = []
59-
for span in spans:
60-
if span['type'] == 'text' and calculate_overlap_area_in_bbox1_area_ratio(span['bbox'], block['bbox']) > 0.7:
61-
span_in_block_list.append(span)
62-
if len(span_in_block_list) > 0:
63-
# span_in_block_list中所有bbox的面积之和
64-
spans_area = sum((span['bbox'][2] - span['bbox'][0]) * (span['bbox'][3] - span['bbox'][1]) for span in span_in_block_list)
65-
# 求ocr_res_area和res的面积的比值
66-
block_area = (block['bbox'][2] - block['bbox'][0]) * (block['bbox'][3] - block['bbox'][1])
67-
if block_area > 0:
68-
ratio = spans_area / block_area
69-
if ratio > 0.25 and ocr_enable:
70-
# 移除block的group_id
71-
block.pop('group_id', None)
72-
# 符合文本图的条件就把块加入到文本块列表中
73-
text_blocks.append(block)
74-
else:
75-
# 如果不符合文本图的条件,就把块加回到图片块列表中
76-
img_body_blocks.append(block)
58+
should_add_to_text_blocks = False
59+
60+
if ocr_enable:
61+
# 找到与当前block重叠的text spans
62+
span_in_block_list = [
63+
span for span in spans
64+
if span['type'] == 'text' and
65+
calculate_overlap_area_in_bbox1_area_ratio(span['bbox'], block['bbox']) > 0.7
66+
]
67+
68+
if len(span_in_block_list) > 0:
69+
# 计算spans总面积
70+
spans_area = sum(
71+
(span['bbox'][2] - span['bbox'][0]) * (span['bbox'][3] - span['bbox'][1])
72+
for span in span_in_block_list
73+
)
74+
75+
# 计算block面积
76+
block_area = (block['bbox'][2] - block['bbox'][0]) * (block['bbox'][3] - block['bbox'][1])
77+
78+
# 判断是否符合文本图条件
79+
if block_area > 0 and spans_area / block_area > 0.25:
80+
should_add_to_text_blocks = True
81+
82+
# 根据条件决定添加到哪个列表
83+
if should_add_to_text_blocks:
84+
block.pop('group_id', None) # 移除group_id
85+
text_blocks.append(block)
7786
else:
7887
img_body_blocks.append(block)
7988

mineru/utils/model_utils.py

Lines changed: 94 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,15 +131,10 @@ def merge_high_iou_tables(table_res_list, layout_res, table_indices, iou_thresho
131131

132132
# Create merged table
133133
merged_table = table_res_list[i].copy()
134-
merged_table['poly'][0] = union_xmin
135-
merged_table['poly'][1] = union_ymin
136-
merged_table['poly'][2] = union_xmax
137-
merged_table['poly'][3] = union_ymin
138-
merged_table['poly'][4] = union_xmax
139-
merged_table['poly'][5] = union_ymax
140-
merged_table['poly'][6] = union_xmin
141-
merged_table['poly'][7] = union_ymax
142-
134+
merged_table['poly'] = [
135+
union_xmin, union_ymin, union_xmax, union_ymin,
136+
union_xmax, union_ymax, union_xmin, union_ymax
137+
]
143138
# Update layout_res
144139
to_remove = [table_indices[j], table_indices[i]]
145140
for idx in sorted(to_remove, reverse=True):
@@ -253,6 +248,83 @@ def remove_overlaps_min_blocks(res_list):
253248
return res_list, need_remove
254249

255250

251+
def remove_overlaps_low_confidence_blocks(combined_res_list, overlap_threshold=0.8):
252+
"""
253+
Remove low-confidence blocks that overlap with other blocks.
254+
255+
This function identifies and removes blocks with low confidence scores that overlap
256+
with other blocks. It calculates the coordinates and area of each block, and checks
257+
for overlaps based on a specified threshold. Blocks that meet the criteria for removal
258+
are returned in a list.
259+
260+
Parameters:
261+
combined_res_list (list): A list of blocks, where each block is a dictionary containing
262+
keys like 'poly' (polygon coordinates) and optionally 'score' (confidence score).
263+
overlap_threshold (float): The threshold for determining overlap between blocks. Default is 0.8.
264+
265+
Returns:
266+
list: A list of blocks to be removed, based on the overlap and confidence criteria.
267+
"""
268+
# 计算每个block的坐标和面积
269+
block_info = []
270+
for block in combined_res_list:
271+
xmin, ymin = int(block['poly'][0]), int(block['poly'][1])
272+
xmax, ymax = int(block['poly'][4]), int(block['poly'][5])
273+
area = (xmax - xmin) * (ymax - ymin)
274+
score = block.get('score', 0.5) # 如果没有score字段,默认为0.5
275+
block_info.append((xmin, ymin, xmax, ymax, area, score, block))
276+
277+
blocks_to_remove = []
278+
marked_indices = set() # 跟踪已标记为删除的block索引
279+
280+
# 检查每个block内部是否有3个及以上的小block
281+
for i, (xmin, ymin, xmax, ymax, area, score, block) in enumerate(block_info):
282+
# 如果当前block已标记为删除,则跳过
283+
if i in marked_indices:
284+
continue
285+
286+
# 查找内部的小block (仅考虑尚未被标记为删除的block)
287+
blocks_inside = [(j, j_score, j_block) for j, (xj_min, yj_min, xj_max, yj_max, j_area, j_score, j_block) in
288+
enumerate(block_info)
289+
if i != j and j not in marked_indices and is_inside(block_info[j], block_info[i],
290+
overlap_threshold)]
291+
292+
# 如果内部有3个及以上的小block
293+
if len(blocks_inside) >= 3:
294+
# 计算小block的平均分数
295+
avg_score = sum(s for _, s, _ in blocks_inside) / len(blocks_inside)
296+
297+
# 比较大block的分数和小block的平均分数
298+
if score > avg_score:
299+
# 保留大block,扩展其边界
300+
# 首先将所有小block标记为要删除
301+
for j, _, j_block in blocks_inside:
302+
if j_block not in blocks_to_remove:
303+
blocks_to_remove.append(j_block)
304+
marked_indices.add(j) # 标记索引为已处理
305+
306+
# 扩展大block的边界以包含所有小block
307+
new_xmin, new_ymin, new_xmax, new_ymax = xmin, ymin, xmax, ymax
308+
for _, _, j_block in blocks_inside:
309+
j_xmin, j_ymin = int(j_block['poly'][0]), int(j_block['poly'][1])
310+
j_xmax, j_ymax = int(j_block['poly'][4]), int(j_block['poly'][5])
311+
new_xmin = min(new_xmin, j_xmin)
312+
new_ymin = min(new_ymin, j_ymin)
313+
new_xmax = max(new_xmax, j_xmax)
314+
new_ymax = max(new_ymax, j_ymax)
315+
316+
# 更新大block的边界
317+
block['poly'][0] = block['poly'][6] = new_xmin
318+
block['poly'][1] = block['poly'][3] = new_ymin
319+
block['poly'][2] = block['poly'][4] = new_xmax
320+
block['poly'][5] = block['poly'][7] = new_ymax
321+
else:
322+
# 保留小blocks,删除大block
323+
blocks_to_remove.append(block)
324+
marked_indices.add(i) # 标记当前索引为已处理
325+
return blocks_to_remove
326+
327+
256328
def get_res_list_from_layout_res(layout_res, iou_threshold=0.7, overlap_threshold=0.8, area_threshold=0.8):
257329
"""Extract OCR, table and other regions from layout results."""
258330
ocr_res_list = []
@@ -311,6 +383,19 @@ def get_res_list_from_layout_res(layout_res, iou_threshold=0.7, overlap_threshol
311383
del res['bbox']
312384
layout_res.remove(res)
313385

386+
# 检测大block内部是否包含多个小block, 合并ocr和table列表进行检测
387+
combined_res_list = ocr_res_list + filtered_table_res_list
388+
blocks_to_remove = remove_overlaps_low_confidence_blocks(combined_res_list, overlap_threshold)
389+
# 移除需要删除的blocks
390+
for block in blocks_to_remove:
391+
if block in ocr_res_list:
392+
ocr_res_list.remove(block)
393+
elif block in filtered_table_res_list:
394+
filtered_table_res_list.remove(block)
395+
# 同时从layout_res中删除
396+
if block in layout_res:
397+
layout_res.remove(block)
398+
314399
return ocr_res_list, filtered_table_res_list, single_page_mfdetrec_res
315400

316401

0 commit comments

Comments
 (0)