@@ -115,6 +115,9 @@ class BlockType:
115115 TableBody = "table_body"
116116 TableCaption = "table_caption"
117117 TableFootnote = "table_footnote"
118+ Code = "code"
119+ CodeBody = "code_body"
120+ CodeCaption = "code_caption"
118121
119122
120123class ContentType :
@@ -145,13 +148,28 @@ def convert_para(
145148 }
146149 )
147150
148- if para_type in [BlockType .Text , BlockType .List , BlockType . Index ]:
151+ if para_type in [BlockType .Text , BlockType .Index ]:
149152 return [
150153 TextPart (
151154 content = merge_para_with_text (para_block ),
152155 metadata = metadata ,
153156 )
154157 ]
158+ elif para_type == BlockType .List :
159+ # The output of VLM backend for the List type is different than the pipeline backend.
160+ # See https://opendatalab.github.io/MinerU/reference/output_files/#intermediate-processing-results-middlejson_1
161+ if para_block .get ("sub_type" ) is None :
162+ # The `sub_type` field is exclusive to the VLM backend.
163+ # Its absence indicates the pipeline backend is in use.
164+ return [
165+ TextPart (
166+ content = merge_para_with_text (para_block ),
167+ metadata = metadata ,
168+ )
169+ ]
170+ else :
171+ # In VLM backend, the List block is a second-level block.
172+ return _convert_list_para (image_dir , para_block , metadata )
155173 elif para_type == BlockType .Title :
156174 title_level = para_block .get ("level" , 1 )
157175 return [
@@ -172,6 +190,9 @@ def convert_para(
172190 return _convert_image_para (image_dir , para_block , metadata )
173191 elif para_type == BlockType .Table :
174192 return _convert_table_para (image_dir , para_block , metadata )
193+ elif para_type == BlockType .Code :
194+ # Code blocks are exclusive to the VLM backend.
195+ return _convert_code_para (image_dir , para_block , metadata )
175196
176197 return []
177198
@@ -291,3 +312,53 @@ def _convert_table_para(image_dir: Path, para_block: dict[str, Any], metadata: d
291312 url = asset_url ,
292313 )
293314 return [asset_bin_part , img_part ]
315+
316+
317+ def _convert_list_para (image_dir : Path , para_block : dict [str , Any ], metadata : dict [str , Any ]) -> list [Part ]:
318+ items : list [str ] = []
319+ for block in para_block ["blocks" ]:
320+ if block ["type" ] == BlockType .Text :
321+ items .append (merge_para_with_text (block ))
322+
323+ if len (items ) == 0 :
324+ return []
325+
326+ result : list [Part ] = []
327+ for item in items :
328+ result .append (TextPart (content = item , metadata = metadata ))
329+ return result
330+
331+
332+ def _convert_code_para (image_dir : Path , para_block : dict [str , Any ], metadata : dict [str , Any ]) -> list [Part ]:
333+ code_body = ""
334+ code_caption = ""
335+ for block in para_block ["blocks" ]:
336+ block_type = block ["type" ]
337+ if block_type == BlockType .CodeBody :
338+ for line in block ["lines" ]:
339+ for span in line ["spans" ]:
340+ if span ["type" ] == ContentType .Text :
341+ code_body += span ["content" ] + "\n "
342+ elif block_type == BlockType .CodeCaption :
343+ for line in block ["lines" ]:
344+ for span in line ["spans" ]:
345+ if span ["type" ] == ContentType .Text :
346+ code_caption += span ["content" ] + "\n "
347+
348+ result = []
349+ if code_caption :
350+ code_caption_part = TextPart (
351+ content = code_caption ,
352+ metadata = metadata ,
353+ )
354+ result .append (code_caption_part )
355+
356+ if code_body :
357+ # TODO: add a CodePart
358+ code_body_part = TextPart (
359+ content = code_body ,
360+ metadata = metadata ,
361+ )
362+ result .append (code_body_part )
363+
364+ return result
0 commit comments