ModelEngine-Group
diff --git a/‎backend/apps/vectordatabase_app.py‎
Lines changed: 31 additions & 0 deletions b/‎backend/apps/vectordatabase_app.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎backend/services/vectordatabase_service.py‎
Lines changed: 66 additions & 3 deletions b/‎backend/services/vectordatabase_service.py‎
Lines changed: 66 additions & 3 deletions
@@ -3,6 +3,7 @@
 from typing import Any, Dict, List, Optional
 
 from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query
+from fastapi.responses import JSONResponse
 
 from consts.model import IndexingResponse
 from nexent.vector_database.base import VectorDatabaseCore
@@ -195,3 +196,33 @@ def health_check(vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)):
         return ElasticSearchService.health_check(vdb_core)
     except Exception as e:
         raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"{str(e)}")
+
+
+@router.post("/{index_name}/chunks")
+def get_index_chunks(
+        index_name: str = Path(...,
+                               description="Name of the index to get chunks from"),
+        page: int = Query(
+            None, description="Page number (1-based) for pagination"),
+        page_size: int = Query(
+            None, description="Number of records per page for pagination"),
+        path_or_url: Optional[str] = Query(
+            None, description="Filter chunks by document path_or_url"),
+        vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)
+):
+    """Get chunks from the specified index, with optional pagination support"""
+    try:
+        result = ElasticSearchService.get_index_chunks(
+            index_name=index_name,
+            page=page,
+            page_size=page_size,
+            path_or_url=path_or_url,
+            vdb_core=vdb_core,
+        )
+        return JSONResponse(status_code=HTTPStatus.OK, content=result)
+    except Exception as e:
+        error_msg = str(e)
+        logger.error(
+            f"Error getting chunks for index '{index_name}': {error_msg}")
+        raise HTTPException(
+            status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error getting chunks: {error_msg}")
@@ -35,6 +35,9 @@
 from utils.config_utils import tenant_config_manager, get_model_name_from_config
 from utils.file_management_utils import get_all_files_status, get_file_size
 
+ALLOWED_CHUNK_FIELDS = {"filename",
+                        "path_or_url", "content", "create_time", "id"}
+
 # Configure logging
 logger = logging.getLogger("vectordatabase_service")
 
@@ -572,7 +575,8 @@ async def list_files(
                     'file_size': file_info.get('file_size', 0),
                     'create_time': int(utc_create_timestamp * 1000),
                     'status': "COMPLETED",
-                    'latest_task_id': ''
+                    'latest_task_id': '',
+                    'chunk_count': file_info.get('chunk_count', 0)
                 }
                 files.append(file_data)
 
@@ -630,7 +634,7 @@ async def list_files(
                 # Initialize chunks for all files
                 for file_data in files:
                     file_data['chunks'] = []
-                    file_data['chunk_count'] = 0
+                    file_data['chunk_count'] = file_data.get('chunk_count', 0)
 
                 if msearch_body:
                     try:
@@ -667,7 +671,7 @@ async def list_files(
             else:
                 for file_data in files:
                     file_data['chunks'] = []
-                    file_data['chunk_count'] = 0
+                    file_data['chunk_count'] = file_data.get('chunk_count', 0)
 
             return {"files": files}
 
@@ -919,3 +923,62 @@ def get_summary(index_name: str = Path(..., description="Name of the index to ge
         except Exception as e:
             error_msg = f"Failed to get summary: {str(e)}"
             raise Exception(error_msg)
+
+    @staticmethod
+    def get_index_chunks(
+        index_name: str,
+        page: Optional[int] = None,
+        page_size: Optional[int] = None,
+        path_or_url: Optional[str] = None,
+        vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
+    ):
+        """
+        Retrieve chunk records for the specified index with optional pagination.
+
+        Args:
+            index_name: Name of the index to query
+            page: Page number (1-based) when paginating
+            page_size: Page size when paginating
+            path_or_url: Optional document filter
+            vdb_core: VectorDatabaseCore instance
+
+        Returns:
+            Dictionary containing status, chunk list, total, and pagination metadata
+        """
+        try:
+            result = vdb_core.get_index_chunks(
+                index_name,
+                page=page,
+                page_size=page_size,
+                path_or_url=path_or_url,
+            )
+            raw_chunks = result.get("chunks", [])
+            total = result.get("total", len(raw_chunks))
+            result_page = result.get("page", page)
+            result_page_size = result.get("page_size", page_size)
+
+            filtered_chunks: List[Any] = []
+            for chunk in raw_chunks:
+                if isinstance(chunk, dict):
+                    filtered_chunks.append(
+                        {
+                            field: chunk.get(field)
+                            for field in ALLOWED_CHUNK_FIELDS
+                            if field in chunk
+                        }
+                    )
+                else:
+                    filtered_chunks.append(chunk)
+
+            return {
+                "status": "success",
+                "message": f"Successfully retrieved {len(filtered_chunks)} chunks from index {index_name}",
+                "chunks": filtered_chunks,
+                "total": total,
+                "page": result_page,
+                "page_size": result_page_size
+            }
+        except Exception as e:
+            error_msg = f"Error retrieving chunks from index {index_name}: {str(e)}"
+            logger.error(error_msg)
+            raise Exception(error_msg)