Skip to content

Commit 0fc4d31

Browse files
committed
chore: 文档内容无法提取的时候输出错误信息
1 parent 1f88ee2 commit 0fc4d31

File tree

3 files changed

+50
-41
lines changed

3 files changed

+50
-41
lines changed

apps/common/handle/impl/table/csv_parse_table_handle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,4 @@ def get_content(self, file):
4141
return buffer.decode(detect(buffer)['encoding'])
4242
except BaseException as e:
4343
max_kb.error(f'csv split handle error: {e}')
44-
return [{'name': file.name, 'paragraphs': []}]
44+
return f'error: {e}'

apps/common/handle/impl/table/xls_parse_table_handle.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -63,21 +63,26 @@ def handle(self, file, get_buffer, save_image):
6363

6464
def get_content(self, file):
6565
# 打开 .xls 文件
66-
workbook = xlrd.open_workbook(file_contents=file.read(), formatting_info=True)
67-
sheets = workbook.sheets()
68-
md_tables = ''
69-
for sheet in sheets:
66+
try:
67+
workbook = xlrd.open_workbook(file_contents=file.read(), formatting_info=True)
68+
sheets = workbook.sheets()
69+
md_tables = ''
70+
for sheet in sheets:
7071

71-
# 获取表头和内容
72-
headers = sheet.row_values(0)
73-
data = [sheet.row_values(row_idx) for row_idx in range(1, sheet.nrows)]
72+
# 获取表头和内容
73+
headers = sheet.row_values(0)
74+
data = [sheet.row_values(row_idx) for row_idx in range(1, sheet.nrows)]
7475

75-
# 构建 Markdown 表格
76-
md_table = '| ' + ' | '.join(headers) + ' |\n'
77-
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
78-
for row in data:
79-
# 将每个单元格中的内容替换换行符为 <br> 以保留原始格式
80-
md_table += '| ' + ' | '.join([str(cell).replace('\n', '<br>') if cell else '' for cell in row]) + ' |\n'
81-
md_tables += md_table + '\n\n'
76+
# 构建 Markdown 表格
77+
md_table = '| ' + ' | '.join(headers) + ' |\n'
78+
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
79+
for row in data:
80+
# 将每个单元格中的内容替换换行符为 <br> 以保留原始格式
81+
md_table += '| ' + ' | '.join(
82+
[str(cell).replace('\n', '<br>') if cell else '' for cell in row]) + ' |\n'
83+
md_tables += md_table + '\n\n'
8284

83-
return md_tables
85+
return md_tables
86+
except Exception as e:
87+
max_kb.error(f'excel split handle error: {e}')
88+
return f'error: {e}'

apps/common/handle/impl/table/xlsx_parse_table_handle.py

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -75,28 +75,32 @@ def handle(self, file, get_buffer, save_image):
7575

7676

7777
def get_content(self, file):
78-
# 加载 Excel 文件
79-
workbook = load_workbook(file)
80-
md_tables = ''
81-
# 如果未指定 sheet_name,则使用第一个工作表
82-
for sheetname in workbook.sheetnames:
83-
sheet = workbook[sheetname] if sheetname else workbook.active
84-
85-
# 获取工作表的所有行
86-
rows = list(sheet.iter_rows(values_only=True))
87-
if not rows:
88-
continue
89-
90-
# 提取表头和内容
91-
headers = rows[0]
92-
data = rows[1:]
93-
94-
# 构建 Markdown 表格
95-
md_table = '| ' + ' | '.join(headers) + ' |\n'
96-
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
97-
for row in data:
98-
md_table += '| ' + ' | '.join(
99-
[str(cell).replace('\n', '<br>') if cell is not None else '' for cell in row]) + ' |\n'
100-
101-
md_tables += md_table + '\n\n'
102-
return md_tables
78+
try:
79+
# 加载 Excel 文件
80+
workbook = load_workbook(file)
81+
md_tables = ''
82+
# 如果未指定 sheet_name,则使用第一个工作表
83+
for sheetname in workbook.sheetnames:
84+
sheet = workbook[sheetname] if sheetname else workbook.active
85+
86+
# 获取工作表的所有行
87+
rows = list(sheet.iter_rows(values_only=True))
88+
if not rows:
89+
continue
90+
91+
# 提取表头和内容
92+
headers = rows[0]
93+
data = rows[1:]
94+
95+
# 构建 Markdown 表格
96+
md_table = '| ' + ' | '.join(headers) + ' |\n'
97+
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
98+
for row in data:
99+
md_table += '| ' + ' | '.join(
100+
[str(cell).replace('\n', '<br>') if cell is not None else '' for cell in row]) + ' |\n'
101+
102+
md_tables += md_table + '\n\n'
103+
return md_tables
104+
except Exception as e:
105+
max_kb.error(f'excel split handle error: {e}')
106+
return f'error: {e}'

0 commit comments

Comments
 (0)