4545from common .core .db import engine
4646from common .core .deps import CurrentAssistant , CurrentUser
4747from common .error import SingleMessageError , SQLBotDBError , ParseSQLResultError , SQLBotDBConnectionError
48+ from common .utils .data_format import DataFormat
4849from common .utils .utils import SQLBotLogUtil , extract_nested_json , prepare_for_orjson
4950
5051warnings .filterwarnings ("ignore" )
@@ -1039,7 +1040,7 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
10391040
10401041 result = self .execute_sql (sql = real_execute_sql )
10411042
1042- _data = self .convert_large_numbers_in_object_array (result .get ('data' ))
1043+ _data = DataFormat .convert_large_numbers_in_object_array (result .get ('data' ))
10431044 result ["data" ] = _data
10441045
10451046 self .save_sql_data (session = _session , data_obj = result )
@@ -1057,15 +1058,15 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
10571058 for field in result .get ('fields' ):
10581059 _column_list .append (AxisObj (name = field , value = field ))
10591060
1060- md_data , _fields_list = self .convert_object_array_for_pandas (_column_list , result .get ('data' ))
1061+ md_data , _fields_list = DataFormat .convert_object_array_for_pandas (_column_list , result .get ('data' ))
10611062
10621063 # data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data'))
10631064
10641065 if not _data or not _fields_list :
10651066 yield 'The SQL execution result is empty.\n \n '
10661067 else :
10671068 df = pd .DataFrame (_data , columns = _fields_list )
1068- df_safe = self .safe_convert_to_string (df )
1069+ df_safe = DataFormat .safe_convert_to_string (df )
10691070 markdown_table = df_safe .to_markdown (index = False )
10701071 yield markdown_table + '\n \n '
10711072 else :
@@ -1115,15 +1116,15 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
11151116 _column_list .append (
11161117 AxisObj (name = field if not _fields .get (field ) else _fields .get (field ), value = field ))
11171118
1118- md_data , _fields_list = self .convert_object_array_for_pandas (_column_list , result .get ('data' ))
1119+ md_data , _fields_list = DataFormat .convert_object_array_for_pandas (_column_list , result .get ('data' ))
11191120
11201121 # data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data'))
11211122
11221123 if not md_data or not _fields_list :
11231124 yield 'The SQL execution result is empty.\n \n '
11241125 else :
11251126 df = pd .DataFrame (md_data , columns = _fields_list )
1126- df_safe = self .safe_convert_to_string (df )
1127+ df_safe = DataFormat .safe_convert_to_string (df )
11271128 markdown_table = df_safe .to_markdown (index = False )
11281129 yield markdown_table + '\n \n '
11291130
@@ -1176,120 +1177,7 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
11761177 self .finish (_session )
11771178 session_maker .remove ()
11781179
1179- @staticmethod
1180- def safe_convert_to_string (df ):
1181- df_copy = df .copy ()
1182-
1183- def format_value (x ):
1184- if pd .isna (x ):
1185- return ""
1186-
1187- return "\u200b " + str (x )
1188-
1189- for col in df_copy .columns :
1190- df_copy [col ] = df_copy [col ].apply (format_value )
1191-
1192- return df_copy
1193-
1194- @staticmethod
1195- def convert_large_numbers_in_object_array (obj_array , int_threshold = 1e15 , float_threshold = 1e10 ):
1196- """处理对象数组,将每个对象中的大数字转换为字符串"""
1197-
1198- def format_float_without_scientific (value ):
1199- """格式化浮点数,避免科学记数法"""
1200- if value == 0 :
1201- return "0"
1202- formatted = f"{ value :.15f} "
1203- if '.' in formatted :
1204- formatted = formatted .rstrip ('0' ).rstrip ('.' )
1205- return formatted
1206-
1207- def process_object (obj ):
1208- """处理单个对象"""
1209- if not isinstance (obj , dict ):
1210- return obj
1211-
1212- processed_obj = {}
1213- for key , value in obj .items ():
1214- if isinstance (value , (int , float )):
1215- # 只转换大数字
1216- if isinstance (value , int ) and abs (value ) >= int_threshold :
1217- processed_obj [key ] = str (value )
1218- elif isinstance (value , float ) and (abs (value ) >= float_threshold or abs (value ) < 1e-6 ):
1219- processed_obj [key ] = format_float_without_scientific (value )
1220- else :
1221- processed_obj [key ] = value
1222- elif isinstance (value , dict ):
1223- # 处理嵌套对象
1224- processed_obj [key ] = process_object (value )
1225- elif isinstance (value , list ):
1226- # 处理对象中的数组
1227- processed_obj [key ] = [process_item (item ) for item in value ]
1228- else :
1229- processed_obj [key ] = value
1230- return processed_obj
1231-
1232- def process_item (item ):
1233- """处理数组中的项目"""
1234- if isinstance (item , dict ):
1235- return process_object (item )
1236- return item
1237-
1238- return [process_item (obj ) for obj in obj_array ]
12391180
1240- @staticmethod
1241- def convert_object_array_for_pandas (column_list : list , data_list : list ):
1242- _fields_list = []
1243- for field_idx , field in enumerate (column_list ):
1244- _fields_list .append (field .name )
1245-
1246- md_data = []
1247- for inner_data in data_list :
1248- _row = []
1249- for field_idx , field in enumerate (column_list ):
1250- value = inner_data .get (field .value )
1251- _row .append (value )
1252- md_data .append (_row )
1253- return md_data , _fields_list
1254-
1255- @staticmethod
1256- def format_pd_data (column_list : list , data_list : list , col_formats : dict = None ):
1257- # 预处理数据并记录每列的格式类型
1258- # 格式类型:'text'(文本)、'number'(数字)、'default'(默认)
1259- _fields_list = []
1260-
1261- if col_formats is None :
1262- col_formats = {}
1263- for field_idx , field in enumerate (column_list ):
1264- _fields_list .append (field .name )
1265- col_formats [field_idx ] = 'default' # 默认不特殊处理
1266-
1267- data = []
1268-
1269- for _data in data_list :
1270- _row = []
1271- for field_idx , field in enumerate (column_list ):
1272- value = _data .get (field .value )
1273- if value is not None :
1274- # 检查是否为数字且需要特殊处理
1275- if isinstance (value , (int , float )):
1276- # 整数且超过15位 → 转字符串并标记为文本列
1277- if isinstance (value , int ) and len (str (abs (value ))) > 15 :
1278- value = str (value )
1279- col_formats [field_idx ] = 'text'
1280- # 小数且超过15位有效数字 → 转字符串并标记为文本列
1281- elif isinstance (value , float ):
1282- decimal_str = format (value , '.16f' ).rstrip ('0' ).rstrip ('.' )
1283- if len (decimal_str ) > 15 :
1284- value = str (value )
1285- col_formats [field_idx ] = 'text'
1286- # 其他数字列标记为数字格式(避免科学记数法)
1287- elif col_formats [field_idx ] != 'text' :
1288- col_formats [field_idx ] = 'number'
1289- _row .append (value )
1290- data .append (_row )
1291-
1292- return data , _fields_list , col_formats
12931181
12941182 def run_recommend_questions_task_async (self ):
12951183 self .future = executor .submit (self .run_recommend_questions_task_cache )
0 commit comments