1414from typing import List
1515
1616from docx import Document , ImagePart
17+ from docx .oxml import ns
1718from docx .table import Table
1819from docx .text .paragraph import Paragraph
19- from docx .oxml import ns
2020
2121from common .handle .base_split_handle import BaseSplitHandle
2222from common .util .split_model import SplitModel
3333combine_nsmap = {** ns .nsmap , ** old_docx_nsmap }
3434
3535
36- def image_to_mode (image , doc : Document , images_list , get_image_id , is_new_docx = True ):
37- if is_new_docx :
38- image_ids = image .xpath ('.//a:blip/@r:embed' )
39- else :
40- image_ids = image .xpath ('.//v:imagedata/@r:id' , namespaces = combine_nsmap )
36+ def image_to_mode (image , doc : Document , images_list , get_image_id ):
37+ image_ids = image ['get_image_id_handle' ](image .get ('image' ))
4138 for img_id in image_ids : # 获取图片id
4239 part = doc .part .related_parts [img_id ] # 根据图片id获取对应的图片
4340 if isinstance (part , ImagePart ):
@@ -49,14 +46,15 @@ def image_to_mode(image, doc: Document, images_list, get_image_id, is_new_docx=T
4946
5047
5148def get_paragraph_element_images (paragraph_element , doc : Document , images_list , get_image_id ):
52- images_xpath_list = [".//pic:pic" , ".//w:pict" ]
49+ images_xpath_list = [(".//pic:pic" , lambda img : img .xpath ('.//a:blip/@r:embed' )),
50+ (".//w:pict" , lambda img : img .xpath ('.//v:imagedata/@r:id' , namespaces = combine_nsmap ))]
5351 images = []
54- for images_xpath in images_xpath_list :
52+ for images_xpath , get_image_id_handle in images_xpath_list :
5553 try :
5654 _images = paragraph_element .xpath (images_xpath )
5755 if _images is not None and len (_images ) > 0 :
5856 for image in _images :
59- images .append (image )
57+ images .append ({ ' image' : image , 'get_image_id_handle' : get_image_id_handle } )
6058 except Exception as e :
6159 pass
6260 return images
0 commit comments