Skip to content

Commit 78cd949

Browse files
committed
fix: 修复上传xlsx里的图片没在文档提取中显示的问题
1 parent cf4b4af commit 78cd949

File tree

1 file changed

+15
-9
lines changed

1 file changed

+15
-9
lines changed

apps/common/handle/impl/table/xlsx_parse_table_handle.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -78,28 +78,34 @@ def get_content(self, file, save_image):
7878
try:
7979
# 加载 Excel 文件
8080
workbook = load_workbook(file)
81+
try:
82+
image_dict: dict = xlsx_embed_cells_images(file)
83+
if len(image_dict) > 0:
84+
save_image(image_dict.values())
85+
except Exception as e:
86+
print(f'{e}')
87+
image_dict = {}
8188
md_tables = ''
8289
# 如果未指定 sheet_name,则使用第一个工作表
8390
for sheetname in workbook.sheetnames:
8491
sheet = workbook[sheetname] if sheetname else workbook.active
85-
86-
# 获取工作表的所有行
87-
rows = list(sheet.iter_rows(values_only=True))
88-
if not rows:
89-
continue
92+
rows = self.fill_merged_cells(sheet, image_dict)
9093

9194
# 提取表头和内容
92-
headers = rows[0]
93-
data = rows[1:]
95+
96+
headers = [f"{key}" for key, value in rows[0].items()]
9497

9598
# 构建 Markdown 表格
9699
md_table = '| ' + ' | '.join(headers) + ' |\n'
97100
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
98-
for row in data:
101+
for row in rows:
102+
r = [f'{value}' for key, value in row.items()]
99103
md_table += '| ' + ' | '.join(
100-
[str(cell).replace('\n', '<br>') if cell is not None else '' for cell in row]) + ' |\n'
104+
[str(cell).replace('\n', '<br>') if cell is not None else '' for cell in r]) + ' |\n'
101105

102106
md_tables += md_table + '\n\n'
107+
108+
md_tables = md_tables.replace('/api/image/', '/api/file/')
103109
return md_tables
104110
except Exception as e:
105111
max_kb.error(f'excel split handle error: {e}')

0 commit comments

Comments
 (0)