Skip to content

Commit b3dbe00

Browse files
committed
fix: 修复上传xlsx里的图片没在文档提取中显示的问题
1 parent cf4b4af commit b3dbe00

File tree

1 file changed

+14
-7
lines changed

1 file changed

+14
-7
lines changed

apps/common/handle/impl/table/xlsx_parse_table_handle.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,28 +78,35 @@ def get_content(self, file, save_image):
7878
try:
7979
# 加载 Excel 文件
8080
workbook = load_workbook(file)
81+
try:
82+
image_dict: dict = xlsx_embed_cells_images(file)
83+
if len(image_dict) > 0:
84+
save_image(image_dict.values())
85+
except Exception as e:
86+
print(f'{e}')
87+
image_dict = {}
8188
md_tables = ''
8289
# 如果未指定 sheet_name,则使用第一个工作表
8390
for sheetname in workbook.sheetnames:
8491
sheet = workbook[sheetname] if sheetname else workbook.active
85-
86-
# 获取工作表的所有行
87-
rows = list(sheet.iter_rows(values_only=True))
88-
if not rows:
89-
continue
92+
rows = self.fill_merged_cells(sheet, image_dict)
9093

9194
# 提取表头和内容
92-
headers = rows[0]
95+
96+
headers = [f"{key}" for key, value in rows[0].items()]
9397
data = rows[1:]
9498

9599
# 构建 Markdown 表格
96100
md_table = '| ' + ' | '.join(headers) + ' |\n'
97101
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
98102
for row in data:
103+
r = [f'{value}' for key, value in row.items()]
99104
md_table += '| ' + ' | '.join(
100-
[str(cell).replace('\n', '<br>') if cell is not None else '' for cell in row]) + ' |\n'
105+
[str(cell).replace('\n', '<br>') if cell is not None else '' for cell in r]) + ' |\n'
101106

102107
md_tables += md_table + '\n\n'
108+
109+
md_tables = md_tables.replace('/api/image/', '/api/file/')
103110
return md_tables
104111
except Exception as e:
105112
max_kb.error(f'excel split handle error: {e}')

0 commit comments

Comments
 (0)