ModelEngine-Group
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 2 deletions b/‎.gitignore‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎backend/apps/data_process_app.py‎
Lines changed: 49 additions & 1 deletion b/‎backend/apps/data_process_app.py‎
Lines changed: 49 additions & 1 deletion
diff --git a/‎backend/apps/elasticsearch_app.py‎
Lines changed: 3 additions & 4 deletions b/‎backend/apps/elasticsearch_app.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎backend/consts/model.py‎
Lines changed: 11 additions & 0 deletions b/‎backend/consts/model.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎backend/data_process/app.py‎
Lines changed: 5 additions & 5 deletions b/‎backend/data_process/app.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎backend/data_process/config.py‎
Lines changed: 1 addition & 1 deletion b/‎backend/data_process/config.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/data_process/ray_actors.py‎
Lines changed: 6 additions & 3 deletions b/‎backend/data_process/ray_actors.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎backend/data_process/ray_config.py‎
Lines changed: 39 additions & 49 deletions b/‎backend/data_process/ray_config.py‎
Lines changed: 39 additions & 49 deletions
diff --git a/‎backend/data_process/tasks.py‎
Lines changed: 8 additions & 28 deletions b/‎backend/data_process/tasks.py‎
Lines changed: 8 additions & 28 deletions
@@ -24,8 +24,7 @@ frontend_standalone/
 .pnpm-store/
 frontend-dist/
 
-backend/assets/clip-vit-base-patch32
-model-assets
+model-assets/
 
 # Test coverage reports
 *coverage_html
 
@@ -6,7 +6,7 @@
 import time
 
 from consts.model import TaskResponse, TaskRequest, BatchTaskResponse, BatchTaskRequest, SimpleTaskStatusResponse, \
-    SimpleTasksListResponse
+    SimpleTasksListResponse, ConvertStateRequest, ConvertStateResponse
 from data_process.utils import get_task_info
 from data_process.tasks import process_and_forward, process_sync
 from services.data_process_service import get_data_process_service
@@ -363,3 +363,51 @@ async def process_text_file(
             status_code=500, 
             detail=f"An error occurred while processing the file: {str(e)}"
         )
+
+@router.post("/convert_state", response_model=ConvertStateResponse, status_code=200)
+async def convert_state(request: ConvertStateRequest):
+    """Convert Celery task states to custom frontend state.
+
+    This helper endpoint allows callers that do **not** install Celery dependencies
+    to obtain the corresponding frontend state for a pair of Celery task states.
+    """
+    from celery import states
+
+    def _convert_to_custom_state_inner(process_celery_state: str, forward_celery_state: str) -> str:
+        """Inner helper to keep the original mapping logic in one place."""
+        # Handle failure states first
+        if process_celery_state == states.FAILURE:
+            return "PROCESS_FAILED"
+        if forward_celery_state == states.FAILURE:
+            return "FORWARD_FAILED"
+
+        # Handle completed state - both must be SUCCESS
+        if process_celery_state == states.SUCCESS and forward_celery_state == states.SUCCESS:
+            return "COMPLETED"
+
+        # Handle case where nothing has started
+        if not process_celery_state and not forward_celery_state:
+            return "WAIT_FOR_PROCESSING"
+
+        # Define state mappings
+        forward_state_map = {
+            states.PENDING: "WAIT_FOR_FORWARDING",
+            states.STARTED: "FORWARDING",
+            states.SUCCESS: "COMPLETED",
+            states.FAILURE: "FORWARD_FAILED",
+        }
+        process_state_map = {
+            states.PENDING: "WAIT_FOR_PROCESSING",
+            states.STARTED: "PROCESSING",
+            states.SUCCESS: "WAIT_FOR_FORWARDING",  # Process done, waiting for forward
+            states.FAILURE: "PROCESS_FAILED",
+        }
+
+        if forward_celery_state:
+            return forward_state_map.get(forward_celery_state, "WAIT_FOR_FORWARDING")
+        if process_celery_state:
+            return process_state_map.get(process_celery_state, "WAIT_FOR_PROCESSING")
+        return "WAIT_FOR_PROCESSING"
+
+    state = _convert_to_custom_state_inner(request.process_state or "", request.forward_state or "")
+    return ConvertStateResponse(state=state)
@@ -151,12 +151,11 @@ def create_index_documents(
 @router.get("/{index_name}/files")
 async def get_index_files(
         index_name: str = Path(..., description="Name of the index"),
-        search_redis: bool = Query(True, description="Whether to search Redis to get incomplete files"),
         es_core: ElasticSearchCore = Depends(get_es_core)
 ):
     """Get all files from an index, including those that are not yet stored in ES"""
     try:
-        result = await ElasticSearchService.list_files(index_name, include_chunks=False, search_redis=search_redis, es_core=es_core)
+        result = await ElasticSearchService.list_files(index_name, include_chunks=False, es_core=es_core)
         # Transform result to match frontend expectations
         return {
             "status": "success",
@@ -188,7 +187,7 @@ def delete_documents(
             result["redis_cleanup"] = redis_cleanup_result
 
             # Update the message to include Redis cleanup info
-            original_message = result.get("message", f"Documents deleted successfully")
+            original_message = result.get("message", "Documents deleted successfully")
             result["message"] = (f"{original_message}. "
                                f"Cleaned up {redis_cleanup_result['total_deleted']} Redis records "
                                f"({redis_cleanup_result['celery_tasks_deleted']} tasks, "
@@ -205,7 +204,7 @@ def delete_documents(
             logger.warning(f"Redis cleanup failed for document {path_or_url} in index {index_name}: {str(redis_error)}")
 
             result["redis_cleanup_error"] = str(redis_error)
-            original_message = result.get("message", f"Documents deleted successfully")
+            original_message = result.get("message", "Documents deleted successfully")
             result["message"] = (f"{original_message}, "
                                f"but Redis cleanup encountered an error: {str(redis_error)}")
 
 
@@ -276,3 +276,14 @@ class ExportAndImportAgentInfo(BaseModel):
 class AgentImportRequest(BaseModel):
     agent_id: int
     agent_info: ExportAndImportAgentInfo
+
+
+class ConvertStateRequest(BaseModel):
+    """Request schema for /tasks/convert_state endpoint"""
+    process_state: str = ""
+    forward_state: str = ""
+
+
+class ConvertStateResponse(BaseModel):
+    """Response schema for /tasks/convert_state endpoint"""
+    state: str
@@ -12,16 +12,16 @@
 
 # Determine package path dynamically
 import_path = 'data_process.tasks'
-logger.info(f"Using import path: {import_path}")
+logger.debug(f"Using import path: {import_path}")
 
 REDIS_URL = config.redis_url
 REDIS_BACKEND_URL = config.redis_backend_url
 
 if not REDIS_URL or not REDIS_BACKEND_URL:
     raise ValueError("FATAL: REDIS_URL or REDIS_BACKEND_URL is not configured. Please check the environment variables in this container.")
 
-logger.info(f"Broker URL from config: {REDIS_URL}")
-logger.info(f"Backend URL from config: {REDIS_BACKEND_URL}")
+logger.debug(f"Broker URL from config: {REDIS_URL}")
+logger.debug(f"Backend URL from config: {REDIS_BACKEND_URL}")
 
 # Create Celery app instance
 app = Celery(
@@ -62,8 +62,8 @@
     result_backend_always_retry=True,  # Always retry backend operations
     result_backend_max_retries=10,  # Max retries for backend operations
     task_time_limit=3600,      # 1 hour time limit per task
-    worker_prefetch_multiplier=1,  # Don't prefetch tasks, process one at a time
-    worker_max_tasks_per_child=100,  # Restart worker after 100 tasks
+    worker_prefetch_multiplier=4,  # Allow prefetching for better throughput
+    worker_max_tasks_per_child=1000,  # Reduce restart frequency
     # Important for task chains
     task_acks_late=True,       # Tasks are acknowledged after completion
     task_reject_on_worker_lost=True,  # Tasks are rejected if worker is lost
 
@@ -65,7 +65,7 @@ def ray_plasma_directory(self) -> str:
     @property
     def ray_object_store_memory_gb(self) -> float:
         """Ray object store memory limit (GB)"""
-        return float(os.getenv('RAY_OBJECT_STORE_MEMORY_GB', '2.0'))
+        return float(os.getenv('RAY_OBJECT_STORE_MEMORY_GB', '4.0'))
 
     @property
     def ray_temp_dir(self) -> str:
 
@@ -7,17 +7,20 @@
 from database.attachment_db import get_file_stream
 
 logger = logging.getLogger(__name__)
-NUM_CPUS = int(os.getenv("RAY_NUM_CPUS", "1"))
+# This now controls the number of CPUs requested by each DataProcessorRayActor instance.
+# It allows a single file processing task to potentially use more than one core if the
+# underlying processing library (e.g., unstructured) can leverage it.
+RAY_ACTOR_NUM_CPUS = int(os.getenv("RAY_ACTOR_NUM_CPUS", "2"))
 
 
-@ray.remote(num_cpus=NUM_CPUS)
+@ray.remote(num_cpus=RAY_ACTOR_NUM_CPUS)
 class DataProcessorRayActor:
     """
     Ray actor for handling data processing tasks.
     Encapsulates the DataProcessCore to be used in a Ray cluster.
     """
     def __init__(self):
-        logger.info(f"Ray starting using {NUM_CPUS} CPUs...")
+        logger.info(f"Ray actor initialized using {RAY_ACTOR_NUM_CPUS} CPU cores...")
         self._processor = DataProcessCore()
 
     def process_file(self, source: str, chunking_strategy: str, destination: str, task_id: Optional[str] = None, **params) -> List[Dict[str, Any]]:
 
@@ -10,6 +10,9 @@
 
 logger = logging.getLogger(__name__)
 
+# Forward declaration variable so runtime references succeed before instantiation
+ray_config: Optional["RayConfig"] = None
+
 
 class RayConfig:
     """Ray configuration manager"""
@@ -82,8 +85,13 @@ def init_ray(self, **kwargs) -> bool:
 
             params = self.get_init_params(**kwargs)
 
-            logger.info("Initializing Ray cluster...")
-            logger.debug(f"Ray configuration parameters:")
+            # Get Ray configuration from environment
+            ray_num_cpus = os.environ.get('RAY_NUM_CPUS')
+            num_cpus = int(ray_num_cpus) if ray_num_cpus else None  # None lets Ray decide
+
+            # Log the attempt to initialize
+            logger.debug("Initializing Ray cluster...")
+            logger.debug("Ray configuration parameters:")
             for key, value in params.items():
                 if key.startswith('_'):
                     logger.debug(f"  {key}: {value}")
@@ -133,7 +141,7 @@ def connect_to_cluster(self, address: str = "auto") -> bool:
             return True
 
         except Exception as e:
-            logger.info(f"Failed to connect to Ray cluster: {str(e)}")
+            logger.info(f"Cannot connect to Ray cluster: {str(e)}")
             return False
 
     def start_local_cluster(self, 
@@ -167,54 +175,36 @@ def log_configuration(self):
         logger.debug(f"  ObjectStore memory: {self.object_store_memory_gb} GB")
         logger.debug(f"  Temp directory: {self.temp_dir}")
 
+    @classmethod
+    def init_ray_for_worker(cls, address: str = "auto") -> bool:
+        """Initialize Ray connection for Celery Worker (class method wrapper)."""
+        logger.info("Initialize Ray connection for Celery Worker...")
+        ray_config.log_configuration()
+        return ray_config.connect_to_cluster(address)
 
-# Create a global RayConfiguration instance
-ray_config = RayConfig()
+    @classmethod
+    def init_ray_for_service(cls,
+                             num_cpus: Optional[int] = None,
+                             dashboard_port: int = 8265,
+                             try_connect_first: bool = True,
+                             include_dashboard: bool = True) -> bool:
+        """Initialize Ray for data processing service (class method wrapper)."""
+        ray_config.log_configuration()
 
+        if try_connect_first:
+            # Try to connect to existing cluster first
+            logger.debug("Trying to connect to existing Ray cluster...")
+            if ray_config.connect_to_cluster("auto"):
+                return True
+            logger.info("Starting local cluster...")
 
-def init_ray_for_worker(address: str = "auto") -> bool:
-    """
-    Initialize Ray connection for Celery Worker
-    
-    Args:
-        address: Ray cluster address
-        
-    Returns:
-        Whether initialization is successful
-    """
-    logger.info("Initialize Ray connection for Celery Worker...")
-    ray_config.log_configuration()
-    
-    return ray_config.connect_to_cluster(address)
-
+        # Start local cluster
+        return ray_config.start_local_cluster(
+            num_cpus=num_cpus,
+            include_dashboard=include_dashboard,
+            dashboard_port=dashboard_port
+        )
 
-def init_ray_for_service(num_cpus: Optional[int] = None,
-                        dashboard_port: int = 8265,
-                        try_connect_first: bool = True) -> bool:
-    """
-    Initialize Ray for data processing service
-    
-    Args:
-        num_cpus: Number of CPU cores
-        dashboard_port: Dashboard port
-        try_connect_first: Whether to try connecting to existing cluster first
-        
-    Returns:
-        Whether initialization is successful
-    """
-    ray_config.log_configuration()
-    
-    if try_connect_first:
-        # Try to connect to existing cluster first
-        logger.debug("Trying to connect to existing Ray cluster...")
-        if ray_config.connect_to_cluster("auto"):
-            return True
-        
-        logger.info("Starting local cluster...")
-    
-    # Start local cluster
-    return ray_config.start_local_cluster(
-        num_cpus=num_cpus,
-        dashboard_port=dashboard_port
-    )
+# Create a global RayConfig instance accessible throughout the module
+ray_config = RayConfig()
 
@@ -70,7 +70,6 @@ def run_async(coro):
                 logger.warning("nest_asyncio not available, creating new thread for async operation")
                 # Fallback: run in a new thread
                 import concurrent.futures
-                import threading
 
                 def run_in_thread():
                     new_loop = asyncio.new_event_loop()
@@ -97,22 +96,15 @@ def run_in_thread():
 # This will be initialized on first task run by a worker process
 def get_ray_actor() -> Any:
     """
-    Creates or gets a handle to the named DataProcessorRayActor.
-    This is an idempotent operation, safe from race conditions.
+    Creates a new, anonymous DataProcessorRayActor instance for each call.
+    This allows for parallel execution of data processing tasks, with each
+    task running in its own actor.
     """
     with ray_init_lock:
         init_ray_in_worker()
-        
-    # Use get_if_exists=True to make this operation idempotent.
-    # This will create the actor if it doesn't exist, or get a handle to it if it does.
-    # This is safe to be called from multiple workers concurrently.
-    actor = DataProcessorRayActor.options(
-        name="data_processor_actor",
-        lifetime="detached",
-        get_if_exists=True
-    ).remote()
+    actor = DataProcessorRayActor.remote()
 
-    logger.debug("Successfully obtained handle for DataProcessorRayActor.")
+    logger.debug("Successfully created a new DataProcessorRayActor for a task.")
     return actor
 
 class LoggingTask(Task):
@@ -160,18 +152,6 @@ def process(self, source: str, source_type: str,
 
     logger.info(f"[{self.request.id}] PROCESS TASK: source_type: {source_type}")
 
-    self.update_state(
-        state=states.PENDING,
-        meta={
-            'source': source,
-            'source_type': source_type,
-            'index_name': index_name,
-            'original_filename': original_filename,
-            'task_name': 'process',
-            'start_time': start_time
-        }
-    )
-    
     self.update_state(
         state=states.STARTED,
         meta={
@@ -473,7 +453,7 @@ async def index_documents():
         es_result = run_async(index_documents())
         logger.debug(f"[{self.request.id}] FORWARD TASK: API response from main_server for source '{original_source}': {es_result}")
 
-        if isinstance(es_result, dict) and es_result.get("success") == True:
+        if isinstance(es_result, dict) and es_result.get("success"):
             total_indexed = es_result.get("total_indexed", 0)
             total_submitted = es_result.get("total_submitted", len(formatted_chunks))
             logger.debug(f"[{self.request.id}] FORWARD TASK: main_server reported {total_indexed}/{total_submitted} documents indexed successfully for '{original_source}'. Message: {es_result.get('message')}")
@@ -482,7 +462,7 @@ async def index_documents():
                 logger.info("Value when raise Exception:")
                 logger.info(f"original_source: {original_source}")
                 logger.info(f"original_index_name: {original_index_name}")
-                logger.info(f"task_name: forward")
+                logger.info("task_name: forward")
                 logger.info(f"source: {original_source}")
                 raise Exception(json.dumps({
                     "message": f"Failure reported by main_server. Expected {total_submitted} chunks, indexed {total_indexed} chunks.",
@@ -491,7 +471,7 @@ async def index_documents():
                     "source": original_source,
                     "original_filename": original_filename
                 }, ensure_ascii=False))
-        elif isinstance(es_result, dict) and es_result.get("success") == False:
+        elif isinstance(es_result, dict) and not es_result.get("success"):
             error_message = es_result.get("message", "Unknown error from main_server")
             raise Exception(json.dumps({
                 "message": f"main_server API error: {error_message}",