Skip to content

Commit 8c802c3

Browse files
committed
feat: add Markdown parsing support for QA handling
1 parent 582fb99 commit 8c802c3

File tree

2 files changed

+34
-7
lines changed

2 files changed

+34
-7
lines changed

apps/common/handle/impl/table/csv_parse_table_handle.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
# coding=utf-8
2-
import logging
2+
import csv
3+
import io
34
import traceback
45

56
from charset_normalizer import detect
6-
7+
from common.handle.base_parse_qa_handle import get_title_row_index_dict, get_row_value
78
from common.handle.base_parse_table_handle import BaseParseTableHandle
89
from common.utils.logger import maxkb_logger
910

@@ -38,7 +39,33 @@ def handle(self, file, get_buffer, save_image):
3839
def get_content(self, file, save_image):
3940
buffer = file.read()
4041
try:
41-
return buffer.decode(detect(buffer)['encoding'])
42-
except BaseException as e:
43-
maxkb_logger.error(f'csv split handle error: {e}')
44-
return f'error: {e}'
42+
reader = csv.reader(io.TextIOWrapper(io.BytesIO(buffer), encoding=detect(buffer)['encoding']))
43+
rows = list(reader)
44+
45+
if not rows:
46+
return ""
47+
48+
# 构建 Markdown 表格
49+
md_lines = []
50+
51+
# 添加表头
52+
header = [cell.replace('\n', '<br>').replace('\r', '') for cell in rows[0]]
53+
md_lines.append('| ' + ' | '.join(header) + ' |')
54+
55+
# 添加分隔线
56+
md_lines.append('| ' + ' | '.join(['---'] * len(header)) + ' |')
57+
58+
# 添加数据行
59+
for row in rows[1:]:
60+
if row: # 跳过空行
61+
# 确保行长度与表头一致,并将换行符转换为 <br>
62+
padded_row = [
63+
cell.replace('\n', '<br>').replace('\r', '') for cell in row
64+
] + [''] * (len(header) - len(row))
65+
md_lines.append('| ' + ' | '.join(padded_row[:len(header)]) + ' |')
66+
67+
return '\n'.join(md_lines)
68+
69+
except Exception as e:
70+
maxkb_logger.error(f"Error processing CSV file {file.name}: {e}, {traceback.format_exc()}")
71+
return ""

apps/common/handle/impl/table/xls_parse_table_handle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,5 +89,5 @@ def get_content(self, file, save_image):
8989

9090
return md_tables
9191
except Exception as e:
92-
max_kb.error(f'excel split handle error: {e}')
92+
maxkb_logger.error(f'excel split handle error: {e}')
9393
return f'error: {e}'

0 commit comments

Comments
 (0)