MigoXLab
diff --git a/‎backend/__init__.py‎ b/‎backend/__init__.py‎
diff --git a/‎backend/db/init_db.sql‎
Lines changed: 69 additions & 0 deletions b/‎backend/db/init_db.sql‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎backend/model/task.py‎
Lines changed: 1 addition & 1 deletion b/‎backend/model/task.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/service/task_service.py‎
Lines changed: 49 additions & 13 deletions b/‎backend/service/task_service.py‎
Lines changed: 49 additions & 13 deletions
diff --git a/‎frontend/src/pages/ResultComparison.tsx‎
Lines changed: 3 additions & 3 deletions b/‎frontend/src/pages/ResultComparison.tsx‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎st_engine/engine/core.py‎
Lines changed: 0 additions & 6 deletions b/‎st_engine/engine/core.py‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎st_engine/engine/locustfile.py‎
Lines changed: 39 additions & 26 deletions b/‎st_engine/engine/locustfile.py‎
Lines changed: 39 additions & 26 deletions
@@ -0,0 +1,69 @@
+SET NAMES utf8mb4;
+SET FOREIGN_KEY_CHECKS = 0;
+-- 确保数据库存在并使用该数据库
+CREATE DATABASE IF NOT EXISTS lmeterx;
+USE lmeterx;
+
+-- ----------------------------
+-- Table structure for tasks
+-- ----------------------------
+DROP TABLE IF EXISTS `tasks`;
+CREATE TABLE `tasks` (
+  `id` varchar(36) COLLATE utf8mb4_unicode_ci NOT NULL,
+  `name` varchar(100) COLLATE utf8mb4_unicode_ci NOT NULL,
+  `status` varchar(20) COLLATE utf8mb4_unicode_ci DEFAULT 'idle',
+  `target_host` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
+  `model` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
+  `system_prompt` longtext COLLATE utf8mb4_unicode_ci,
+  `user_prompt` longtext COLLATE utf8mb4_unicode_ci,
+  `stream_mode` varchar(20) COLLATE utf8mb4_unicode_ci DEFAULT 'True',
+  `concurrent_users` int(11) DEFAULT '1',
+  `spawn_rate` int(11) DEFAULT '0',
+  `duration` int(11) DEFAULT '60',
+  `chat_type` int(11) DEFAULT '0',
+  `log_file` longtext COLLATE utf8mb4_unicode_ci,
+  `result_file` longtext COLLATE utf8mb4_unicode_ci,
+  `cert_file` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
+  `key_file` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
+  `headers` json DEFAULT NULL,
+  `error_message` text COLLATE utf8mb4_unicode_ci,
+  `created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
+  `updated_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+  `api_path` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT 'API路径',
+  PRIMARY KEY (`id`),
+  KEY `idx_status_created` (`status`,`created_at`),
+  KEY `idx_updated_at` (`updated_at`),
+  KEY `idx_name` (`name`),
+  KEY `idx_status` (`status`),
+  KEY `idx_created_at` (`created_at`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
+
+-- ----------------------------
+-- Table structure for task_results
+-- ----------------------------
+DROP TABLE IF EXISTS `task_results`;
+CREATE TABLE `task_results` (
+  `id` int(11) NOT NULL AUTO_INCREMENT,
+  `task_id` varchar(36) COLLATE utf8mb4_unicode_ci NOT NULL COMMENT '任务ID',
+  `metric_type` varchar(36) COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT '指标类型',
+  `num_requests` int(11) DEFAULT '0' COMMENT '请求总数量',
+  `num_failures` int(11) DEFAULT '0' COMMENT '请求失败数量',
+  `avg_latency` float DEFAULT '0' COMMENT '请求平均响应时间',
+  `min_latency` float DEFAULT '0' COMMENT '请求最小响应时间',
+  `max_latency` float DEFAULT '0' COMMENT '请求最大响应时间',
+  `median_latency` float DEFAULT '0' COMMENT '请求中位响应时间',
+  `p90_latency` float DEFAULT '0' COMMENT '请求90%响应时间',
+  `rps` float DEFAULT '0' COMMENT '每秒请求数',
+  `avg_content_length` float DEFAULT '0' COMMENT '平均输出的字符长度',
+  `completion_tps` float DEFAULT '0' COMMENT '每秒输出的token数量',
+  `total_tps` float DEFAULT '0' COMMENT '每秒输入输出的总token数量',
+  `avg_total_tokens_per_req` float DEFAULT '0' COMMENT '每个请求的平均输入输出的总token数量',
+  `avg_completion_tokens_per_req` float DEFAULT '0' COMMENT '每个请求的平均输出token数量',
+  `created_at` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
+  `updated_at` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
+  PRIMARY KEY (`id`),
+  KEY `idx_task_id` (`task_id`)
+) ENGINE=InnoDB AUTO_INCREMENT=262 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
+
+-- 最后重新启用外键检查
+SET FOREIGN_KEY_CHECKS = 1;
@@ -249,7 +249,7 @@ class ComparisonMetrics(BaseModel):
         model_name: The model name.
         concurrent_users: The number of concurrent users.
         task_name: The task name.
-        ttft: Time to first token (min_latency).
+        ttft: Time to first token (avg_latency in seconds).
         total_tps: Total tokens per second.
         completion_tps: Completion tokens per second.
         avg_total_tpr: Average total tokens per request.
 
@@ -692,8 +692,20 @@ async def compare_performance_svc(
         for task_id in task_ids:
             task = tasks[task_id]
 
-            # Get TTFT and RPS metrics (from Time_to_first_output_token)
-            ttft_query = (
+            # Get TTFT metrics - first try Time_to_first_reasoning_token, then Time_to_first_output_token
+            ttft_reasoning_query = (
+                select(TaskResult)
+                .where(
+                    TaskResult.task_id == task_id,
+                    TaskResult.metric_type == "Time_to_first_reasoning_token",
+                )
+                .order_by(TaskResult.created_at.desc())
+                .limit(1)
+            )
+            ttft_reasoning_result = await db.execute(ttft_reasoning_query)
+            ttft_reasoning_data = ttft_reasoning_result.scalar_one_or_none()
+
+            ttft_output_query = (
                 select(TaskResult)
                 .where(
                     TaskResult.task_id == task_id,
@@ -702,8 +714,21 @@ async def compare_performance_svc(
                 .order_by(TaskResult.created_at.desc())
                 .limit(1)
             )
-            ttft_result = await db.execute(ttft_query)
-            ttft_data = ttft_result.scalar_one_or_none()
+            ttft_output_result = await db.execute(ttft_output_query)
+            ttft_output_data = ttft_output_result.scalar_one_or_none()
+
+            # Get Total_time metrics for RPS
+            total_time_query = (
+                select(TaskResult)
+                .where(
+                    TaskResult.task_id == task_id,
+                    TaskResult.metric_type == "Total_time",
+                )
+                .order_by(TaskResult.created_at.desc())
+                .limit(1)
+            )
+            total_time_result = await db.execute(total_time_query)
+            total_time_data = total_time_result.scalar_one_or_none()
 
             # Get token metrics (from token_metrics)
             token_query = (
@@ -719,7 +744,7 @@ async def compare_performance_svc(
             token_data = token_result.scalar_one_or_none()
 
             # Check if we have the required data
-            if not ttft_data and not token_data:
+            if not ttft_reasoning_data and not ttft_output_data and not token_data:
                 logger.warning(f"No results found for task {task_id}")
                 continue
 
@@ -732,11 +757,20 @@ async def compare_performance_svc(
             avg_total_tpr = 0.0
             avg_completion_tpr = 0.0
 
-            # Extract TTFT and RPS data
-            if ttft_data:
-                ttft = ttft_data.min_latency or 0.0  # TTFT as minimum latency
-                rps = ttft_data.rps or 0.0
-                avg_response_time = ttft_data.avg_latency or 0.0
+            # Extract TTFT data - prioritize Time_to_first_reasoning_token, then Time_to_first_output_token
+            # Use avg_latency and convert from ms to seconds
+            if ttft_reasoning_data and ttft_reasoning_data.avg_latency:
+                ttft = ttft_reasoning_data.avg_latency / 1000.0  # Convert ms to seconds
+                avg_response_time = ttft_reasoning_data.avg_latency
+            elif ttft_output_data and ttft_output_data.avg_latency:
+                ttft = ttft_output_data.avg_latency / 1000.0  # Convert ms to seconds
+                avg_response_time = ttft_output_data.avg_latency
+
+            # Extract RPS data - prioritize Total_time, then Time_to_first_output_token
+            if total_time_data and total_time_data.rps:
+                rps = total_time_data.rps
+            elif ttft_output_data and ttft_output_data.rps:
+                rps = ttft_output_data.rps
 
             # Extract token metrics data
             if token_data:
@@ -1041,14 +1075,14 @@ async def test_api_endpoint_svc(request: Request, body: TaskCreateReq):
             "response": None,
         }
     except httpx.TimeoutException as e:
-        logger.error(f"Request timeout when testing API endpoint: {e}")
+        logger.error(f"Request timeout when testing API endpoint.")
         return {
             "status": "error",
             "error": f"Request timeout: {str(e)}",
             "response": None,
         }
     except httpx.ConnectError as e:
-        logger.error(f"Connection error when testing API endpoint: {e}")
+        logger.error(f"Connection error when testing API endpoint.")
         return {
             "status": "error",
             "error": f"Connection error: {str(e)}",
@@ -1163,7 +1197,9 @@ async def _handle_streaming_response(response, full_url: str) -> Dict:
             },
         }
     except Exception as stream_error:
-        logger.error(f"Error processing stream: {stream_error}")
+        logger.error(
+            f"Error processing stream: {stream_error}. stream data: {stream_data}"
+        )
         return {
             "status": "error",
             "error": f"Streaming data processing error: {str(stream_error)}",
 
@@ -358,9 +358,9 @@ const ResultComparison: React.FC = () => {
       title: t('pages.resultComparison.modelName'),
       dataIndex: 'model_name',
       key: 'model_name',
-      render: (model: string) => (
-        <Tag color={getModelColor(model)}>{model}</Tag>
-      ),
+      // render: (model: string) => (
+      //   <Tag color={getModelColor(model)}>{model}</Tag>
+      // ),
     },
     {
       title: t('pages.resultComparison.concurrentUsers'),
 
@@ -237,12 +237,6 @@ def parse_field_mapping(field_mapping_str: str) -> FieldMapping:
 class CertificateManager:
     """Manages SSL certificate configuration."""
 
-    # @staticmethod
-    # def configure_certificates(
-    #     cert_file: Optional[str], key_file: Optional[str], task_logger
-    # ) -> Optional[Union[str, Tuple[str, str]]]:
-    #     """Configure client certificate and key."""
-    #     return FilePathUtils.configure_certificates(cert_file, key_file, task_logger)
     @staticmethod
     def configure_certificates(
         cert_file: Optional[str], key_file: Optional[str], task_logger
 
@@ -65,7 +65,6 @@ def graceful_signal_handler(signum, frame):
         )
         return
 
-    task_logger.info(f"Received signal {signum}. Initiating graceful shutdown.")
     _shutdown_in_progress = True
 
     # Let the default signal handler proceed, but our flag will prevent duplicate User.stop() calls
@@ -391,9 +390,6 @@ def get_next_prompt(self) -> Dict[str, Any]:
                 self.environment.prompt_queue.put_nowait(prompt_data)
                 return prompt_data
             else:
-                self.task_logger.warning(
-                    "Prompt queue is empty or not initialized. Using default prompt."
-                )
                 return {"id": "default", "prompt": DEFAULT_PROMPT}
         except queue.Empty:
             self.task_logger.warning("Prompt queue is empty. Using default prompt.")
@@ -419,21 +415,19 @@ def _log_token_counts(
             model_name = global_config.model_name or ""
             system_prompt = global_config.system_prompt or ""
 
-            # Validate inputs
             user_prompt = user_prompt or ""
             reasoning_content = reasoning_content or ""
             model_output = model_output or ""
 
             # Prefer usage_tokens if available and valid
-            completion_tokens = None
-            total_tokens = None
-
-            if usage_tokens:
-                completion_tokens = usage_tokens.get("completion_tokens")
-                total_tokens = usage_tokens.get("total_tokens")
+            completion_tokens = 0
+            total_tokens = 0
 
             # Fallback: manual counting if completion_tokens and total_tokens are missing
-            if completion_tokens is None or total_tokens is None:
+            if usage_tokens and isinstance(usage_tokens, dict):
+                completion_tokens = usage_tokens.get("completion_tokens", 0) or 0
+                total_tokens = usage_tokens.get("total_tokens", 0) or 0
+            else:
                 system_tokens = (
                     count_tokens(system_prompt, model_name) if system_prompt else 0
                 )
@@ -453,9 +447,17 @@ def _log_token_counts(
                 total_tokens = system_tokens + user_tokens + completion_tokens
 
             # Ensure integer and log - only if tokens are not None and positive
-            if completion_tokens is not None and completion_tokens > 0:
+            if (
+                completion_tokens
+                and isinstance(completion_tokens, (int, float))
+                and completion_tokens > 0
+            ):
                 global_task_queue["completion_tokens_queue"].put(int(completion_tokens))
-            if total_tokens is not None and total_tokens > 0:
+            if (
+                total_tokens
+                and isinstance(total_tokens, (int, float))
+                and total_tokens > 0
+            ):
                 global_task_queue["all_tokens_queue"].put(int(total_tokens))
 
         except Exception as e:
@@ -465,14 +467,14 @@ def _log_token_counts(
     def chat_request(self):
         """Main Locust task that executes a single chat request."""
         global_config = get_global_config()
-
-        prompt_data = self.get_next_prompt()
-
+        # Check if we need dataset mode (avoid unnecessary queue operations)
+        needs_dataset = bool(
+            global_config.test_data and global_config.test_data.strip()
+        )
+        prompt_data = self.get_next_prompt() if needs_dataset else None
         base_request_kwargs, user_prompt = self.request_handler.prepare_request_kwargs(
             prompt_data
         )
-        # self.task_logger.info(f"base_request_kwargs: {base_request_kwargs}")
-
         if not base_request_kwargs:
             self.task_logger.error(
                 "Failed to generate request arguments. Skipping task."
@@ -490,7 +492,6 @@ def chat_request(self):
             if base_request_kwargs
             else "failure"
         )
-
         try:
             if global_config.stream_mode:
                 reasoning_content, model_output = (
@@ -505,17 +506,29 @@ def chat_request(self):
                     )
                 )
         except Exception as e:
-            self.task_logger.error(
-                f"Unhandled exception in chat_request: {e}", exc_info=True
-            )
-            # Record the failure event for unhandled exceptions
-            response_time = (time.time() - start_time) * 1000
+            self.task_logger.error(f"Unhandled exception in chat_request: {e}")
+            # Record the failure event for unhandled exceptions with enhanced context
+            try:
+                response_time = (
+                    (time.time() - start_time) * 1000 if start_time is not None else 0
+                )
+            except Exception:
+                response_time = 0
+
             ErrorHandler.handle_general_exception(
                 f"Unhandled exception in chat_request: {e}",
                 self.task_logger,
                 response=None,
                 response_time=response_time,
-                request_name=request_name,
+                additional_context={
+                    "stream_mode": global_config.stream_mode,
+                    "api_path": global_config.api_path,
+                    "prompt_preview": (
+                        str(user_prompt)[:100] if user_prompt else "No prompt"
+                    ),
+                    "task_id": global_config.task_id,
+                    "request_name": request_name,
+                },
             )
 
         if reasoning_content or model_output or usage_tokens: