fix: 修复对话上下文丢失 + AI 自主生成图表

MKY508 · claude · MKY508 · commit 73013e0d161d · 2025-12-12T18:12:00.000+08:00
P0-1: 修复对话上下文丢失 - ExecutionService 新增 _get_conversation_history() 方法 - GptmeEngine.execute() 接收 history 参数 - 历史消息正确注入到 LiteLLM 消息列表 P0-2: AI 自主生成图表 - 更新系统提示，指导 AI 输出 ```chart 配置块 - 新增 _extract_chart_config() 解析 AI 图表配置 - 新增 _build_chart_from_config() 构建图表数据 - 保留自动生成逻辑作为后备方案 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
diff --git a/apps/api/app/services/execution.py b/apps/api/app/services/execution.py
@@ -13,7 +13,7 @@
 
 from app.core import encryptor
 from app.core.config import settings
-from app.db.tables import Connection, Model, SemanticTerm, User
+from app.db.tables import Connection, Message, Model, SemanticTerm, User
 from app.models import SemanticContext, SemanticTermResponse, SSEEvent
 
 logger = structlog.get_logger()
@@ -152,6 +152,35 @@ async def _get_semantic_context(self) -> SemanticContext:
             terms=[SemanticTermResponse.model_validate(t) for t in terms]
         )
 
+    async def _get_conversation_history(
+        self, conversation_id: UUID, limit: int = 10
+    ) -> list[dict[str, str]]:
+        """获取对话历史消息
+
+        Args:
+            conversation_id: 对话 ID
+            limit: 最大消息数量（最近的 N 条）
+
+        Returns:
+            消息列表 [{"role": "user/assistant", "content": "..."}]
+        """
+        result = await self.db.execute(
+            select(Message)
+            .where(Message.conversation_id == conversation_id)
+            .order_by(Message.created_at.desc())
+            .limit(limit)
+        )
+        messages = result.scalars().all()
+
+        # 反转顺序，使最早的消息在前
+        history = []
+        for msg in reversed(messages):
+            if msg.role in ("user", "assistant") and msg.content:
+                history.append({"role": msg.role, "content": msg.content})
+
+        logger.info(f"Loaded {len(history)} history messages for conversation {conversation_id}")
+        return history
+
     async def execute_stream(
         self,
         query: str,
@@ -178,6 +207,10 @@ async def execute_stream(
             semantic_context = await self._get_semantic_context()
             logger.info(f"Semantic terms count: {len(semantic_context.terms)}")
 
+            # 加载对话历史（不包括当前查询，因为当前查询还未保存）
+            logger.info("Getting conversation history...")
+            history = await self._get_conversation_history(conversation_id, limit=10)
+
             system_prompt = self._build_system_prompt(db_config, semantic_context)
 
             engine = GptmeEngine(
@@ -191,6 +224,7 @@ async def execute_stream(
                 query=query,
                 system_prompt=system_prompt,
                 db_config=db_config,
+                history=history,
                 stop_checker=stop_checker,
             ):
                 logger.info(f"Yielding event: {event.type}")
@@ -208,18 +242,50 @@ def _build_system_prompt(
 
 请遵循以下规则：
 1. 只生成只读 SQL（SELECT、SHOW、DESCRIBE）
-2. 使用 pandas 处理数据
-3. 使用 plotly 生成可视化图表
-4. 用中文回复用户
+2. 用中文回复用户
+3. 如果查询结果适合可视化，在回复末尾添加图表配置（使用 ```chart 代码块）：
+
+```chart
+{
+  "type": "bar",
+  "title": "图表标题",
+  "xKey": "x轴字段名",
+  "yKeys": ["y轴字段名1", "y轴字段名2"]
+}
+```
+
+图表类型选择指南：
+- bar: 比较不同类别的数值（如各地区销售额）
+- line: 展示趋势变化（如月度增长）
+- pie: 展示占比分布（如市场份额）
+- area: 展示累积趋势
+
+注意：只有当数据适合可视化时才添加图表配置，简单的单值查询不需要图表。
 """
         else:
             base_prompt = """You are QueryGPT data analysis assistant, helping users query and analyze database data.
 
 Follow these rules:
 1. Only generate read-only SQL (SELECT, SHOW, DESCRIBE)
-2. Use pandas for data processing
-3. Use plotly for visualization
-4. Reply in English
+2. Reply in English
+3. If query results are suitable for visualization, add chart config at the end (using ```chart code block):
+
+```chart
+{
+  "type": "bar",
+  "title": "Chart Title",
+  "xKey": "x_axis_field",
+  "yKeys": ["y_axis_field1", "y_axis_field2"]
+}
+```
+
+Chart type guide:
+- bar: Compare values across categories
+- line: Show trends over time
+- pie: Show proportions/percentages
+- area: Show cumulative trends
+
+Note: Only add chart config when data is suitable for visualization.
 """
 
         if db_config:
diff --git a/apps/api/app/services/gptme_engine.py b/apps/api/app/services/gptme_engine.py
@@ -38,10 +38,18 @@ async def execute(
         query: str,
         system_prompt: str,
         db_config: dict[str, Any] | None = None,
+        history: list[dict[str, str]] | None = None,
         stop_checker: Callable[[], bool] | None = None,
     ) -> AsyncGenerator[SSEEvent, None]:
         """
         执行查询并流式返回结果
+
+        Args:
+            query: 用户查询
+            system_prompt: 系统提示
+            db_config: 数据库配置
+            history: 对话历史消息列表 [{"role": "user/assistant", "content": "..."}]
+            stop_checker: 停止检查函数
         """
         logger.info("GptmeEngine.execute called", model=self.model, query_preview=query[:50])
 
@@ -62,6 +70,7 @@ async def execute(
                 query=query,
                 system_prompt=system_prompt,
                 db_config=db_config,
+                history=history,
                 stop_checker=stop_checker,
             ):
                 yield event
@@ -74,6 +83,7 @@ async def _execute_with_litellm(
         query: str,
         system_prompt: str,
         db_config: dict[str, Any] | None = None,
+        history: list[dict[str, str]] | None = None,
         stop_checker: Callable[[], bool] | None = None,
     ) -> AsyncGenerator[SSEEvent, None]:
         """使用 LiteLLM 执行查询"""
@@ -88,6 +98,14 @@ async def _execute_with_litellm(
                 db_context = self._build_db_context(db_config)
                 messages.append({"role": "system", "content": db_context})
 
+            # 添加对话历史（不包括当前查询，因为当前查询会单独添加）
+            if history:
+                # 过滤掉最后一条用户消息（如果和当前查询相同）
+                for msg in history:
+                    if msg.get("role") in ("user", "assistant") and msg.get("content"):
+                        messages.append({"role": msg["role"], "content": msg["content"]})
+                logger.info(f"Added {len(history)} history messages to context")
+
             messages.append({"role": "user", "content": query})
 
             yield SSEEvent.progress("generating", "正在生成响应...")
@@ -117,7 +135,6 @@ async def _execute_with_litellm(
             data = None
             rows_count = None
             execution_time = None
-            visualization = None
 
             if sql_code and db_config:
                 yield SSEEvent.progress("executing", "正在执行 SQL 查询...")
@@ -126,26 +143,40 @@ async def _execute_with_litellm(
                 try:
                     data, rows_count = await self._execute_sql(sql_code, db_config)
                     execution_time = time.time() - start_time
-
-                    # 尝试生成可视化
-                    if data and len(data) > 0:
-                        visualization = self._generate_visualization(data, query)
                 except Exception as e:
                     full_content += f"\n\n⚠️ SQL 执行错误: {str(e)}"
 
+            # 从 AI 输出中提取图表配置
+            chart_config = self._extract_chart_config(full_content)
+
+            # 移除图表配置代码块，使输出更干净
+            clean_content = re.sub(r"```chart\s*\n?[\s\S]*?\n?```", "", full_content).strip()
+
             yield SSEEvent.result(
-                content=full_content,
+                content=clean_content,
                 sql=sql_code,
                 data=data,
                 rows_count=rows_count,
                 execution_time=execution_time,
             )
 
-            if visualization:
-                yield SSEEvent.visualization(
-                    chart_type=visualization.get("type", "bar"),
-                    chart_data=visualization.get("data", {}),
-                )
+            # 如果 AI 提供了图表配置且有数据，生成可视化
+            if chart_config and data and len(data) > 0:
+                # 构建图表数据
+                visualization = self._build_chart_from_config(chart_config, data)
+                if visualization:
+                    yield SSEEvent.visualization(
+                        chart_type=visualization.get("type", "bar"),
+                        chart_data=visualization,
+                    )
+            elif data and len(data) > 0:
+                # 如果 AI 没有提供图表配置，使用后备的自动生成逻辑
+                visualization = self._generate_visualization(data, query)
+                if visualization:
+                    yield SSEEvent.visualization(
+                        chart_type=visualization.get("type", "bar"),
+                        chart_data=visualization.get("data", {}),
+                    )
 
         except Exception as e:
             yield SSEEvent.error("LITELLM_ERROR", str(e))
@@ -160,8 +191,66 @@ async def _execute_sql(
         result = db_manager.execute_query(sql, read_only=True)
         return result.data, result.rows_count
 
+    def _build_chart_from_config(
+        self, config: dict, data: list[dict]
+    ) -> dict | None:
+        """根据 AI 提供的配置构建图表数据
+
+        Args:
+            config: AI 生成的图表配置 {"type", "title", "xKey", "yKeys"}
+            data: SQL 查询结果数据
+
+        Returns:
+            完整的图表配置，包含数据
+        """
+        if not data or len(data) == 0:
+            return None
+
+        chart_type = config.get("type", "bar")
+        title = config.get("title", "")
+        x_key = config.get("xKey")
+        y_keys = config.get("yKeys", [])
+
+        columns = list(data[0].keys())
+
+        # 如果 AI 没有指定 xKey，使用第一列
+        if not x_key or x_key not in columns:
+            x_key = columns[0]
+
+        # 如果 AI 没有指定 yKeys，自动检测数值列
+        if not y_keys:
+            for col in columns:
+                if col != x_key:
+                    try:
+                        float(data[0][col])
+                        y_keys.append(col)
+                    except (ValueError, TypeError):
+                        pass
+
+        if not y_keys:
+            return None
+
+        # 构建图表数据
+        chart_data = []
+        for row in data[:50]:  # 限制最多 50 条数据
+            item = {"name": str(row.get(x_key, ""))}
+            for y_key in y_keys:
+                try:
+                    item[y_key] = float(row.get(y_key, 0))
+                except (ValueError, TypeError):
+                    item[y_key] = 0
+            chart_data.append(item)
+
+        return {
+            "type": chart_type,
+            "title": title,
+            "data": chart_data,
+            "xKey": "name",
+            "yKeys": y_keys,
+        }
+
     def _generate_visualization(self, data: list[dict], query: str) -> dict | None:
-        """根据数据和查询生成可视化配置"""
+        """根据数据和查询自动生成可视化配置（后备方案）"""
         if not data or len(data) == 0:
             return None
 
@@ -245,6 +334,36 @@ def _extract_sql(self, content: str) -> str | None:
 
         return None
 
+    def _extract_chart_config(self, content: str) -> dict | None:
+        """从 AI 输出中提取图表配置
+
+        Args:
+            content: AI 输出的完整内容
+
+        Returns:
+            图表配置字典，如果没有找到则返回 None
+        """
+        import json
+
+        # 匹配 ```chart ... ``` 代码块
+        pattern = r"```chart\s*\n?([\s\S]*?)\n?```"
+        match = re.search(pattern, content, re.IGNORECASE)
+
+        if match:
+            try:
+                config_str = match.group(1).strip()
+                config = json.loads(config_str)
+
+                # 验证必要字段
+                if "type" in config:
+                    logger.info(f"Extracted chart config: type={config.get('type')}")
+                    return config
+            except json.JSONDecodeError as e:
+                logger.warning(f"Failed to parse chart config: {e}")
+                return None
+
+        return None
+
 
 # 全局引擎实例
 _engine: GptmeEngine | None = None