Skip to content

Commit 9fc1c98

Browse files
committed
fix: prevent bigint data from displaying in scientific notation in chat
1 parent e20587d commit 9fc1c98

File tree

2 files changed

+89
-17
lines changed

2 files changed

+89
-17
lines changed

backend/apps/chat/api/chat.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,13 @@ async def export_excel(session: SessionDep, chat_record_id: int, trans: Trans):
245245

246246
def inner():
247247

248-
data, _fields_list, col_formats = LLMService.format_pd_data(fields, _data + _predict_data)
248+
data_list = LLMService.convert_large_numbers_in_object_array(_data + _predict_data)
249249

250-
df = pd.DataFrame(data, columns=_fields_list)
250+
md_data, _fields_list = LLMService.convert_object_array_for_pandas(fields, data_list)
251+
252+
# data, _fields_list, col_formats = LLMService.format_pd_data(fields, _data + _predict_data)
253+
254+
df = pd.DataFrame(md_data, columns=_fields_list)
251255

252256
buffer = io.BytesIO()
253257

@@ -256,14 +260,14 @@ def inner():
256260
df.to_excel(writer, sheet_name='Sheet1', index=False)
257261

258262
# 获取 xlsxwriter 的工作簿和工作表对象
259-
workbook = writer.book
260-
worksheet = writer.sheets['Sheet1']
261-
262-
for col_idx, fmt_type in col_formats.items():
263-
if fmt_type == 'text':
264-
worksheet.set_column(col_idx, col_idx, None, workbook.add_format({'num_format': '@'}))
265-
elif fmt_type == 'number':
266-
worksheet.set_column(col_idx, col_idx, None, workbook.add_format({'num_format': '0'}))
263+
# workbook = writer.book
264+
# worksheet = writer.sheets['Sheet1']
265+
#
266+
# for col_idx, fmt_type in col_formats.items():
267+
# if fmt_type == 'text':
268+
# worksheet.set_column(col_idx, col_idx, None, workbook.add_format({'num_format': '@'}))
269+
# elif fmt_type == 'number':
270+
# worksheet.set_column(col_idx, col_idx, None, workbook.add_format({'num_format': '0'}))
267271

268272
buffer.seek(0)
269273
return io.BytesIO(buffer.getvalue())

backend/apps/chat/task/llm.py

Lines changed: 75 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,10 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
10381038
return
10391039

10401040
result = self.execute_sql(sql=real_execute_sql)
1041+
1042+
_data = self.convert_large_numbers_in_object_array(result.get('data'))
1043+
result["data"] = _data
1044+
10411045
self.save_sql_data(session=_session, data_obj=result)
10421046
if in_chat:
10431047
yield 'data:' + orjson.dumps({'content': 'execute-success', 'type': 'sql-data'}).decode() + '\n\n'
@@ -1053,12 +1057,14 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
10531057
for field in result.get('fields'):
10541058
_column_list.append(AxisObj(name=field, value=field))
10551059

1056-
data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data'))
1060+
md_data, _fields_list = self.convert_object_array_for_pandas(_column_list, result.get('data'))
1061+
1062+
# data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data'))
10571063

1058-
if not data or not _fields_list:
1064+
if not _data or not _fields_list:
10591065
yield 'The SQL execution result is empty.\n\n'
10601066
else:
1061-
df = pd.DataFrame(data, columns=_fields_list)
1067+
df = pd.DataFrame(_data, columns=_fields_list)
10621068
df_safe = self.safe_convert_to_string(df)
10631069
markdown_table = df_safe.to_markdown(index=False)
10641070
yield markdown_table + '\n\n'
@@ -1091,7 +1097,6 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
10911097
{'content': orjson.dumps(chart).decode(), 'type': 'chart'}).decode() + '\n\n'
10921098
else:
10931099
if stream:
1094-
data = []
10951100
_fields = {}
10961101
if chart.get('columns'):
10971102
for _column in chart.get('columns'):
@@ -1110,12 +1115,14 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
11101115
_column_list.append(
11111116
AxisObj(name=field if not _fields.get(field) else _fields.get(field), value=field))
11121117

1113-
data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data'))
1118+
md_data, _fields_list = self.convert_object_array_for_pandas(_column_list, result.get('data'))
11141119

1115-
if not data or not _fields_list:
1120+
# data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data'))
1121+
1122+
if not md_data or not _fields_list:
11161123
yield 'The SQL execution result is empty.\n\n'
11171124
else:
1118-
df = pd.DataFrame(data, columns=_fields_list)
1125+
df = pd.DataFrame(md_data, columns=_fields_list)
11191126
df_safe = self.safe_convert_to_string(df)
11201127
markdown_table = df_safe.to_markdown(index=False)
11211128
yield markdown_table + '\n\n'
@@ -1183,6 +1190,67 @@ def safe_convert_to_string(df):
11831190

11841191
return df_copy
11851192

1193+
@staticmethod
1194+
def convert_large_numbers_in_object_array(obj_array, int_threshold=1e15, float_threshold=1e10):
1195+
"""处理对象数组,将每个对象中的大数字转换为字符串"""
1196+
1197+
def format_float_without_scientific(value):
1198+
"""格式化浮点数,避免科学记数法"""
1199+
if value == 0:
1200+
return "0"
1201+
formatted = f"{value:.15f}"
1202+
if '.' in formatted:
1203+
formatted = formatted.rstrip('0').rstrip('.')
1204+
return formatted
1205+
1206+
def process_object(obj):
1207+
"""处理单个对象"""
1208+
if not isinstance(obj, dict):
1209+
return obj
1210+
1211+
processed_obj = {}
1212+
for key, value in obj.items():
1213+
if isinstance(value, (int, float)):
1214+
# 只转换大数字
1215+
if isinstance(value, int) and abs(value) >= int_threshold:
1216+
processed_obj[key] = str(value)
1217+
elif isinstance(value, float) and (abs(value) >= float_threshold or abs(value) < 1e-6):
1218+
processed_obj[key] = format_float_without_scientific(value)
1219+
else:
1220+
processed_obj[key] = value
1221+
elif isinstance(value, dict):
1222+
# 处理嵌套对象
1223+
processed_obj[key] = process_object(value)
1224+
elif isinstance(value, list):
1225+
# 处理对象中的数组
1226+
processed_obj[key] = [process_item(item) for item in value]
1227+
else:
1228+
processed_obj[key] = value
1229+
return processed_obj
1230+
1231+
def process_item(item):
1232+
"""处理数组中的项目"""
1233+
if isinstance(item, dict):
1234+
return process_object(item)
1235+
return item
1236+
1237+
return [process_item(obj) for obj in obj_array]
1238+
1239+
@staticmethod
1240+
def convert_object_array_for_pandas(column_list: list, data_list: list):
1241+
_fields_list = []
1242+
for field_idx, field in enumerate(column_list):
1243+
_fields_list.append(field.name)
1244+
1245+
md_data = []
1246+
for inner_data in data_list:
1247+
_row = []
1248+
for field_idx, field in enumerate(column_list):
1249+
value = inner_data.get(field.value)
1250+
_row.append(value)
1251+
md_data.append(_row)
1252+
return md_data, _fields_list
1253+
11861254
@staticmethod
11871255
def format_pd_data(column_list: list, data_list: list, col_formats: dict = None):
11881256
# 预处理数据并记录每列的格式类型

0 commit comments

Comments
 (0)