@@ -167,14 +167,17 @@ def support(self, file, get_buffer):
167167 def get_content (self , file , save_image ):
168168 """
169169 从 zip 中提取并返回拼接的 md 文本,同时收集并保存内嵌图片(通过 save_image 回调)。
170+ 使用 posixpath 来正确处理 zip 内部的路径拼接与规范化。
170171 """
171172 buffer = file .read () if hasattr (file , 'read' ) else None
172173 bytes_io = io .BytesIO (buffer ) if buffer is not None else io .BytesIO (file )
173- md_parts = []
174+ md_items = [] # 存储 (md_text, source_file_path)
174175 image_mode_list = []
175176
177+ import posixpath
178+
176179 def is_image_name (name : str ):
177- ext = os . path .splitext (name .lower ())[1 ]
180+ ext = posixpath .splitext (name .lower ())[1 ]
178181 return ext in ('.png' , '.jpg' , '.jpeg' , '.gif' , '.bmp' , '.webp' , '.svg' )
179182
180183 with zipfile .ZipFile (bytes_io , 'r' ) as zip_ref :
@@ -197,7 +200,6 @@ def is_image_name(name: str):
197200 meta = {'debug' : False , 'content' : raw }
198201 )
199202 image_mode_list .append (fmodel )
200- # 在 md 中不直接插入二进制,保存后上层可替换引用
201203 continue
202204
203205 # 为 split_handle 提供可重复读取的 file-like 对象
@@ -210,22 +212,8 @@ def is_image_name(name: str):
210212 # 准备一个简单的 get_buffer 回调,返回当前 raw
211213 get_buffer = lambda f , _raw = raw : _raw
212214 if split_handle .support (inner_file , get_buffer ):
213- # 回到文件头
214215 inner_file .seek (0 )
215216 md_text = split_handle .get_content (inner_file , save_image )
216- image_list = parse_md_image (md_text )
217- for image in image_list :
218- search = re .search ("\(.*\)" , image )
219- if search :
220- source_image_path = search .group ().replace ('(' , '' ).replace (')' , '' )
221- source_image_path = source_image_path .strip ().split (" " )[0 ]
222- image_path = urljoin (
223- real_name , '.' + source_image_path if source_image_path .startswith (
224- '/' ) else source_image_path
225- )
226- for img_model in image_mode_list :
227- if img_model .file_name == os .path .basename (image_path ):
228- md_text = md_text .replace (source_image_path , f'./oss/file/{ img_model .id } ' )
229217 break
230218
231219 # 如果没有任何 split_handle 处理,按文本解码作为后备
@@ -237,10 +225,42 @@ def is_image_name(name: str):
237225 md_text = raw .decode ('utf-8' , errors = 'ignore' )
238226
239227 if isinstance (md_text , str ) and md_text .strip ():
240- md_parts .append (md_text )
228+ # 保存 md 文本与其所在的文件路径,后面统一做图片路径替换
229+ md_items .append ((md_text , real_name ))
241230
242- # 将收集到的图片通过回调保存
231+ # 将收集到的图片通过回调保存(一次性)
243232 if image_mode_list :
244233 save_image (image_mode_list )
245234
246- return '\n \n ' .join (md_parts )
235+ # 后处理:在每个 md 片段中将相对/绝对引用替换为已保存图片的 oss 路径
236+ content_parts = []
237+ for md_text , base_name in md_items :
238+ image_refs = parse_md_image (md_text )
239+ for image in image_refs :
240+ search = re .search (r"\(.*\)" , image )
241+ if not search :
242+ continue
243+ source_image_path = search .group ().strip ("()" ).split (" " )[0 ]
244+
245+ # 规范化 zip 内部路径:若以 '/' 开头,视为相对于 zip 根,否则相对于 base_name 的目录
246+ if source_image_path .startswith ('/' ):
247+ joined = posixpath .normpath (source_image_path .lstrip ('/' ))
248+ else :
249+ base_dir = posixpath .dirname (base_name )
250+ joined = posixpath .normpath (posixpath .join (base_dir , source_image_path ))
251+
252+ # 匹配已收集图片:以文件名做匹配(zip 中的文件名通常是不含反斜杠的 POSIX 风格)
253+ matched = None
254+ for img_model in image_mode_list :
255+ if img_model .file_name == posixpath .basename (joined ):
256+ matched = img_model
257+ break
258+
259+ if matched :
260+ md_text = md_text .replace (source_image_path , f'./oss/file/{ matched .id } ' )
261+
262+ content_parts .append (md_text )
263+
264+ return '\n \n ' .join (content_parts )
265+
266+
0 commit comments