Skip to content

Commit 5171da9

Browse files
committed
fix: Part of the docx document is parsed incorrectly
1 parent 00591a5 commit 5171da9

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

apps/common/handle/impl/doc_split_handle.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,10 @@ class DocSplitHandle(BaseSplitHandle):
113113
def paragraph_to_md(paragraph: Paragraph, doc: Document, images_list, get_image_id):
114114
try:
115115
psn = paragraph.style.name
116-
if psn.startswith('Heading'):
117-
title = "".join(["#" for i in range(int(psn.replace("Heading ", '')))]) + " " + paragraph.text
116+
if psn.startswith('Heading') or psn.startswith('TOC 标题') or psn.startswith('标题'):
117+
title = "".join(["#" for i in range(
118+
int(psn.replace("Heading ", '').replace('TOC 标题', '').replace('标题',
119+
'')))]) + " " + paragraph.text
118120
images = reduce(lambda x, y: [*x, *y],
119121
[get_paragraph_element_images(e, doc, images_list, get_image_id) for e in
120122
paragraph._element],
@@ -202,4 +204,4 @@ def get_content(self, file, save_image):
202204
return content
203205
except BaseException as e:
204206
traceback.print_exception(e)
205-
return f'{e}'
207+
return f'{e}'

0 commit comments

Comments
 (0)