Skip to content

Commit 4ccb5e0

Browse files
authored
Merge pull request #4193 from opendatalab/dev
Dev
2 parents da0cdfb + 6096359 commit 4ccb5e0

File tree

1 file changed

+6
-7
lines changed

1 file changed

+6
-7
lines changed

mineru/backend/vlm/vlm_middle_json_mkcontent.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ def merge_para_with_text_v2(para_block):
484484
for line in para_block['lines']:
485485
for span in line['spans']:
486486
span_type = span['type']
487-
if span['content']:
487+
if span.get("content", '').strip():
488488
if para_type == BlockType.PHONETIC and span_type == ContentTypeV2.SPAN_TEXT:
489489
span_type = ContentTypeV2.SPAN_PHONETIC
490490
if span_type == ContentType.INLINE_EQUATION:
@@ -534,13 +534,12 @@ def union_make(pdf_info_dict: list,
534534
output_content.append(para_content)
535535
elif make_mode == MakeMode.CONTENT_LIST_V2:
536536
# https://github.com/drunkpig/llm-webkit-mirror/blob/dev6/docs/specification/output_format/content_list_spec.md
537-
page_contents = []
538537
para_blocks = (paras_of_layout or []) + (paras_of_discarded or [])
539-
if not para_blocks:
540-
continue
541-
for para_block in para_blocks:
542-
para_content = make_blocks_to_content_list_v2(para_block, img_buket_path, page_size)
543-
page_contents.append(para_content)
538+
page_contents = []
539+
if para_blocks:
540+
for para_block in para_blocks:
541+
para_content = make_blocks_to_content_list_v2(para_block, img_buket_path, page_size)
542+
page_contents.append(para_content)
544543
output_content.append(page_contents)
545544

546545
if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:

0 commit comments

Comments
 (0)