ModelEngine-Group
diff --git a/‎backend/apps/data_process_app.py‎
Lines changed: 66 additions & 1 deletion b/‎backend/apps/data_process_app.py‎
Lines changed: 66 additions & 1 deletion
diff --git a/‎backend/apps/file_management_app.py‎
Lines changed: 42 additions & 2 deletions b/‎backend/apps/file_management_app.py‎
Lines changed: 42 additions & 2 deletions
diff --git a/‎backend/data_process/tasks.py‎
Lines changed: 42 additions & 35 deletions b/‎backend/data_process/tasks.py‎
Lines changed: 42 additions & 35 deletions
diff --git a/‎backend/services/file_management_service.py‎
Lines changed: 0 additions & 41 deletions b/‎backend/services/file_management_service.py‎
Lines changed: 0 additions & 41 deletions
diff --git a/‎backend/utils/attachment_utils.py‎
Lines changed: 2 additions & 2 deletions b/‎backend/utils/attachment_utils.py‎
Lines changed: 2 additions & 2 deletions
@@ -1,8 +1,10 @@
 import logging
 from contextlib import asynccontextmanager
-from fastapi import HTTPException, APIRouter, Form
+from fastapi import HTTPException, APIRouter, Form, File, UploadFile
 import base64
 import io
+import tempfile
+import os
 
 from consts.model import TaskResponse, TaskRequest, BatchTaskResponse, BatchTaskRequest, SimpleTaskStatusResponse, \
     SimpleTasksListResponse
@@ -292,3 +294,66 @@ async def filter_important_image(
         logger.error(f"Error processing image: {str(e)}")
         raise HTTPException(
             status_code=500, detail=f"Error processing image: {str(e)}")
+
+
+@router.post("/process_text_file", response_model=dict, status_code=200)
+async def process_text_file(
+    file: UploadFile = File(...),
+    chunking_strategy: str = Form("basic"),
+    timeout: int = Form(60)
+):
+    """
+    Transfer the uploaded file to text content
+    
+    This interface is specifically used for file-to-text conversion, supporting multiple file formats including PDF, Word, Excel, etc.
+    Use high-priority processing queue for fast response.
+    
+    Parameters:
+        file: Uploaded file object
+        chunking_strategy: Chunking strategy, default is "basic"
+        timeout: Processing timeout (seconds), default is 60 seconds
+    
+    Returns:
+        JSON object, containing the extracted full text content and processing metadata
+    """
+    temp_file_path = None
+    try:
+        logger.info(f"Processing uploaded file: {file.filename}")
+        
+        # Save the uploaded file to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename or "")[1]) as temp_file:
+            content = await file.read()
+            temp_file.write(content)
+            temp_file_path = temp_file.name
+        
+        logger.info(f"Saved uploaded file to temporary path: {temp_file_path}")
+
+        result = process_sync(source=temp_file_path, source_type='file', chunking_strategy=chunking_strategy, timeout=timeout)
+        logger.info(f"************************{str(result)}")
+        logger.info(f"Successfully processed uploaded file: {file.filename}, extracted {result.get('text_length', 0)} characters")
+        
+        return {
+            "success": True,
+            "task_id": result.get("task_id"),
+            "filename": file.filename,
+            "text": result.get("text", ""),
+            "chunks_count": result.get("chunks_count", 0),
+            "text_length": result.get("text_length", 0),
+            "processing_time": result.get("processing_time", 0),
+            "chunking_strategy": chunking_strategy
+        }
+        
+    except Exception as e:
+        logger.exception(f"Error processing uploaded file {file.filename}: {str(e)}")
+        raise HTTPException(
+            status_code=500, 
+            detail=f"处理文件时发生错误: {str(e)}"
+        )
+    finally:
+        # Clean up temporary files
+        if temp_file_path and os.path.exists(temp_file_path):
+            try:
+                os.unlink(temp_file_path)
+                logger.debug(f"Cleaned up temporary file: {temp_file_path}")
+            except Exception as e:
+                logger.warning(f"Failed to clean up temporary file {temp_file_path}: {str(e)}")
@@ -4,6 +4,8 @@
 from pathlib import Path
 from typing import List, Optional
 from io import BytesIO
+import requests
+import logging
 
 from utils.auth_utils import get_current_user_id
 from fastapi import UploadFile, File, HTTPException, Form, APIRouter, Query, Path as PathParam, Body, Header
@@ -16,7 +18,7 @@
 from database.attachment_db import (
     upload_fileobj, delete_file, get_file_url, list_files
 )
-from services.file_management_service import file_management_service
+logger = logging.getLogger("file_management_app")
 
 # Create upload directory
 upload_dir = Path(UPLOAD_FOLDER)
@@ -444,7 +446,45 @@ async def process_text_file(query, filename, file_content, tenant_id: str) -> st
     """
     Process text file, convert to text using external API
     """
-    raw_text = file_management_service.get_text_from_file(file_content)
+    # file_content is byte data, need to send to API through file upload
+    data_process_service_url = os.environ.get('DATA_PROCESS_SERVICE')
+    api_url = f"{data_process_service_url}/tasks/process_text_file"
+    logger.info(f"Processing text file {filename} with API: {api_url}")
+
+    try:
+        # Upload byte data as a file
+        files = {
+            'file': (filename, file_content, 'application/octet-stream')
+        }
+        data = {
+            'chunking_strategy': 'basic',
+            'timeout': 60
+        }
+        
+        response = requests.post(
+            api_url,
+            files=files,
+            data=data,
+            timeout=60
+        )
+
+        if response.status_code == 200:
+            result = response.json()
+            logger.info(f"File processed successfully: {result}...")
+            raw_text = result.get("text", "")
+            logger.info(f"File processed successfully: {raw_text[:100]}...")
+        else:
+            error_detail = response.json().get('detail', '未知错误') if response.headers.get('content-type', '').startswith('application/json') else response.text
+            logger.error(f"File processing failed (status code: {response.status_code}): {error_detail}")
+            raise Exception(f"File processing failed (status code: {response.status_code}): {error_detail}")
+
+    except requests.exceptions.Timeout:
+        raise Exception("API call timeout")
+    except requests.exceptions.ConnectionError:
+        raise Exception(f"Cannot connect to data processing service: {api_url}")
+    except Exception as e:
+        raise Exception(f"Error processing file: {str(e)}")
+
     text = convert_long_text_to_text(query, raw_text, tenant_id)
     return f"File {filename} content: {text}"
 
 
@@ -2,6 +2,7 @@
 Celery tasks for data processing and vector storage
 """
 import logging
+import uuid
 import os
 import json
 import time
@@ -611,19 +612,23 @@ def process_sync(self, source: str, source_type: str = "file",
         Dict containing the extracted text and metadata
     """
     start_time = time.time()
-    task_id = self.request.id
+    task_id = self.request.id or str(uuid.uuid4())
 
-    # Update task state to PROCESSING
-    self.update_state(
-        state=states.STARTED,
-        meta={
-            'source': source,
-            'source_type': source_type,
-            'task_name': '',
-            'start_time': start_time,
-            'sync_mode': True
-        }
-    )
+    # Check if we're in a valid Celery context before updating state
+    is_celery_context = hasattr(self, 'request') and self.request.id is not None
+    
+    # Update task state to PROCESSING only if in Celery context
+    if is_celery_context:
+        self.update_state(
+            state=states.STARTED,
+            meta={
+                'source': source,
+                'source_type': source_type,
+                'task_name': 'process_sync',
+                'start_time': start_time,
+                'sync_mode': True
+            }
+        )
 
     logger.info(f"Synchronous processing file: {source} with strategy: {chunking_strategy}")
 
@@ -651,18 +656,19 @@ def process_sync(self, source: str, source_type: str = "file",
         # Extract text from chunks
         text_content = "\n\n".join([chunk.get("content", "") for chunk in chunks])
 
-        # Update task state to COMPLETE
-        self.update_state(
-            state=states.SUCCESS,
-            meta={
-                'chunks_count': len(chunks),
-                'processing_time': elapsed_time,
-                'source': source,
-                'task_name': '',
-                'text_length': len(text_content),
-                'sync_mode': True
-            }
-        )
+        # Update task state to COMPLETE only if in Celery context
+        if is_celery_context:
+            self.update_state(
+                state=states.SUCCESS,
+                meta={
+                    'chunks_count': len(chunks),
+                    'processing_time': elapsed_time,
+                    'source': source,
+                    'task_name': 'process_sync',
+                    'text_length': len(text_content),
+                    'sync_mode': True
+                }
+            )
 
         logger.info(f"Synchronously processed {len(chunks)} chunks from {source} in {elapsed_time:.2f}s")
 
@@ -679,17 +685,18 @@ def process_sync(self, source: str, source_type: str = "file",
     except Exception as e:
         logger.error(f"Error synchronously processing file {source}: {str(e)}")
 
-        # Update task state to FAILURE with custom metadata
-        self.update_state(
-            state=states.FAILURE,
-            meta={
-                'source': source,
-                'task_name': 'process_sync',
-                'custom_error': str(e),
-                'sync_mode': True,
-                'stage': 'sync_processing_failed'
-            }
-        )
+        # Update task state to FAILURE with custom metadata only if in Celery context
+        if is_celery_context:
+            self.update_state(
+                state=states.FAILURE,
+                meta={
+                    'source': source,
+                    'task_name': 'process_sync',
+                    'custom_error': str(e),
+                    'sync_mode': True,
+                    'stage': 'sync_processing_failed'
+                }
+            )
 
         # Re-raise to let Celery handle exception serialization
         raise
@@ -29,8 +29,8 @@ def convert_long_text_to_text(query: str, file_context: str, tenant_id: str):
     long_text_to_text_model = OpenAILongContextModel(
         observer=MessageObserver(),
         model_id=get_model_name_from_config(secondary_model_config),
-        api_base=secondary_model_config.get_config("LLM_SECONDARY_MODEL_URL"),
-        api_key=secondary_model_config.get_config("LLM_SECONDARY_API_KEY")
+        api_base=secondary_model_config.get("base_url"),
+        api_key=secondary_model_config.get("api_key")
     )
     system_prompt = f"用户提出了一个问题：{query}，请从回答这个问题的角度精简、仔细描述一下这段文本，200字以内。"
     user_prompt = "请仔细阅读并分析这段文本："