Skip to content

Commit a3c0cc1

Browse files
committed
refactor: update file ID assignment in document extraction to use provided metadata
1 parent c9eb84f commit a3c0cc1

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

apps/common/handle/impl/text/doc_split_handle.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,11 @@ def get_image_id(image_id):
114114

115115
title_font_list = [
116116
[36, 100],
117-
[30, 36]
117+
[26, 36],
118+
[24, 26],
119+
[22, 24],
120+
[18, 22],
121+
[16, 18]
118122
]
119123

120124

@@ -125,12 +129,12 @@ def get_title_level(paragraph: Paragraph):
125129
if psn.startswith('Heading') or psn.startswith('TOC 标题') or psn.startswith('标题'):
126130
return int(psn.replace("Heading ", '').replace('TOC 标题', '').replace('标题',
127131
''))
128-
if len(paragraph.runs) == 1:
132+
if len(paragraph.runs) >= 1:
129133
font_size = paragraph.runs[0].font.size
130134
pt = font_size.pt
131-
if pt >= 30:
135+
if pt >= 16:
132136
for _value, index in zip(title_font_list, range(len(title_font_list))):
133-
if pt >= _value[0] and pt < _value[1]:
137+
if pt >= _value[0] and pt < _value[1] and any([run.font.bold for run in paragraph.runs]):
134138
return index + 1
135139
except Exception as e:
136140
pass

0 commit comments

Comments
 (0)