Increase upload file size limit & fix:_normalize_prompt_field

LuckyYC · LuckyYC · commit dec0519d331d · 2025-08-28T16:19:26.000+08:00
diff --git a/backend/utils/be_config.py b/backend/utils/be_config.py
@@ -25,7 +25,7 @@
     UPLOAD_FOLDER = os.path.join(BASE_DIR, "upload_files")
 
 # File upload security configuration
-MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
+MAX_FILE_SIZE = 10 * 1024 * 1024 * 1024  # 10GB
 MAX_FILENAME_LENGTH = 255
 MAX_TASK_ID_LENGTH = 64
 
diff --git a/backend/utils/security.py b/backend/utils/security.py
@@ -95,7 +95,7 @@ def validate_file_size(file_size: int) -> None:
     """
     if file_size > MAX_FILE_SIZE:
         raise ValueError(
-            f"File size exceeds maximum allowed size of {MAX_FILE_SIZE // (1024*1024)}MB"
+            f"File size exceeds maximum allowed size of {MAX_FILE_SIZE / (1024*1024*1024):.1f}GB"
         )
 
 
diff --git a/frontend/default.conf b/frontend/default.conf
@@ -8,6 +8,11 @@ server {
     error_log /var/log/nginx/error.log warn;
     access_log /var/log/nginx/access.log combined if=$loggable;
 
+    # Handle large file uploads - move to server level
+    client_max_body_size 10g;
+    client_body_timeout 300s;
+    client_header_timeout 300s;
+
     # Security related headers
     add_header X-Frame-Options "SAMEORIGIN";
     add_header X-XSS-Protection "1; mode=block";
@@ -31,9 +36,9 @@ server {
         proxy_cache_bypass $http_upgrade;
 
         # Increase timeout to match backend timeout (180s + buffer)
-        proxy_connect_timeout 120s;
-        proxy_send_timeout 120s;
-        proxy_read_timeout 120s;
+        proxy_connect_timeout 300s;
+        proxy_send_timeout 300s;
+        proxy_read_timeout 300s;
     }
 
     # Block access to upload directory for security
diff --git a/frontend/nginx.conf b/frontend/nginx.conf
@@ -39,7 +39,7 @@ http {
 
     server_tokens off;
     client_body_buffer_size 128k;
-    client_max_body_size 10m;
+    client_max_body_size 10g;
     client_header_buffer_size 1k;
     large_client_header_buffers 4 8k;
 
diff --git a/frontend/src/components/CreateJobForm.tsx b/frontend/src/components/CreateJobForm.tsx
@@ -480,8 +480,8 @@ const CreateJobFormContent: React.FC<CreateJobFormProps> = ({
   const handleDatasetFileUpload = async (options: any) => {
     const { file, onSuccess, onError } = options;
     try {
-      // Validate file size (10MB limit)
-      const maxSize = 10 * 1024 * 1024; // 10MB
+      // Validate file size (1GB limit)
+      const maxSize = 10 * 1024 * 1024 * 1024; // 10GB
       if (file.size > maxSize) {
         message.error(
           t('components.createJobForm.fileSizeExceedsLimitWithSize', {
diff --git a/frontend/src/utils/constants.ts b/frontend/src/utils/constants.ts
@@ -70,7 +70,7 @@ export const FILE_UPLOAD_CONFIG = {
   KEY_FORMATS: '.key,.pem',
   COMBINED_FORMATS: '.pem',
   MAX_COUNT: 1,
-  MAX_SIZE: 10 * 1024 * 1024, // 10MB
+  MAX_SIZE: 10 * 1024 * 1024 * 1024, // 10GB
   ALLOWED_TYPES: ['.json', '.txt', '.csv'],
   CERTIFICATE_TYPES: ['.pem', '.crt', '.key'],
   IMAGE_TYPES: ['.jpg', '.jpeg', '.png', '.gif', '.webp'],
diff --git a/st_engine/engine/locustfile.py b/st_engine/engine/locustfile.py
@@ -475,6 +475,7 @@ def chat_request(self):
         base_request_kwargs, user_prompt = self.request_handler.prepare_request_kwargs(
             prompt_data
         )
+        self.task_logger.debug(f"base_request_kwargs: {base_request_kwargs}")
         if not base_request_kwargs:
             self.task_logger.error(
                 "Failed to generate request arguments. Skipping task."
diff --git a/st_engine/utils/common.py b/st_engine/utils/common.py
@@ -159,11 +159,35 @@ def encode_image(image_path: str) -> str:
 
 # === DATA PROCESSING ===
 def _normalize_prompt_field(prompt: Any) -> str:
-    """Normalize prompt field to string."""
+    """Normalize prompt field to string.
+
+    Supports multiple input formats:
+    - String: returned as-is
+    - Simple list: first element converted to string
+    - Object with 'messages' key: JSON serialized (for chat-like formats)
+    - Other objects: JSON serialized
+    """
     if isinstance(prompt, str):
         return prompt
     elif isinstance(prompt, list) and prompt:
+        # Handle simple list format like ["prompt text"]
         return str(prompt[0])
+    elif isinstance(prompt, dict):
+        # Handle complex object formats
+        try:
+            # Special handling for chat-like formats with messages
+            if "messages" in prompt:
+                # This handles formats like {"messages": [{"role": "user", "content": "..."}]}
+                return json.dumps(prompt, ensure_ascii=False, separators=(",", ":"))
+            else:
+                # Handle other dictionary formats
+                return json.dumps(prompt, ensure_ascii=False, separators=(",", ":"))
+        except (TypeError, ValueError) as e:
+            # Fallback to string representation if JSON serialization fails
+            logger.warning(
+                f"Failed to serialize prompt object to JSON: {e}, using string representation"
+            )
+            return str(prompt)
     else:
         return ""
 
@@ -200,9 +224,18 @@ def _parse_jsonl_line(
         prompt_id = json_obj.get("id", line_num)
 
         # Extract and normalize prompt
-        prompt = _normalize_prompt_field(json_obj.get("prompt"))
+        raw_prompt = json_obj.get("prompt")
+        prompt = _normalize_prompt_field(raw_prompt)
         if not prompt:
-            effective_logger.warning(f"Empty prompt in line {line_num}: {line}")
+            # For debugging, show the type and structure of the raw prompt
+            prompt_info = f"type: {type(raw_prompt).__name__}"
+            if isinstance(raw_prompt, dict) and "messages" in raw_prompt:
+                prompt_info += f", has {len(raw_prompt['messages'])} messages"
+            elif isinstance(raw_prompt, list):
+                prompt_info += f", list length: {len(raw_prompt)}"
+            effective_logger.warning(
+                f"Empty prompt in line {line_num} ({prompt_info}): {line}..."
+            )
             return None
 
         # Handle images

Original file line number	Diff line number	Diff line change
`@@ -95,7 +95,7 @@ def validate_file_size(file_size: int) -> None:`
`95`	`95`	`"""`
`96`	`96`	`if file_size > MAX_FILE_SIZE:`
`97`	`97`	`raise ValueError(`
`98`		`- f"File size exceeds maximum allowed size of {MAX_FILE_SIZE // (1024*1024)}MB"`
	`98`	`+ f"File size exceeds maximum allowed size of {MAX_FILE_SIZE / (102410241024):.1f}GB"`
`99`	`99`	`)`
`100`	`100`
`101`	`101`
Original file line number	Diff line number	Diff line change
`@@ -475,6 +475,7 @@ def chat_request(self):`
`475`	`475`	`base_request_kwargs, user_prompt = self.request_handler.prepare_request_kwargs(`
`476`	`476`	`prompt_data`
`477`	`477`	`)`
	`478`	`+ self.task_logger.debug(f"base_request_kwargs: {base_request_kwargs}")`
`478`	`479`	`if not base_request_kwargs:`
`479`	`480`	`self.task_logger.error(`
`480`	`481`	`"Failed to generate request arguments. Skipping task."`