|
35 | 35 | from utils.config_utils import tenant_config_manager, get_model_name_from_config |
36 | 36 | from utils.file_management_utils import get_all_files_status, get_file_size |
37 | 37 |
|
| 38 | +ALLOWED_CHUNK_FIELDS = {"filename", |
| 39 | + "path_or_url", "content", "create_time", "id"} |
| 40 | + |
38 | 41 | # Configure logging |
39 | 42 | logger = logging.getLogger("vectordatabase_service") |
40 | 43 |
|
@@ -572,7 +575,8 @@ async def list_files( |
572 | 575 | 'file_size': file_info.get('file_size', 0), |
573 | 576 | 'create_time': int(utc_create_timestamp * 1000), |
574 | 577 | 'status': "COMPLETED", |
575 | | - 'latest_task_id': '' |
| 578 | + 'latest_task_id': '', |
| 579 | + 'chunk_count': file_info.get('chunk_count', 0) |
576 | 580 | } |
577 | 581 | files.append(file_data) |
578 | 582 |
|
@@ -630,7 +634,7 @@ async def list_files( |
630 | 634 | # Initialize chunks for all files |
631 | 635 | for file_data in files: |
632 | 636 | file_data['chunks'] = [] |
633 | | - file_data['chunk_count'] = 0 |
| 637 | + file_data['chunk_count'] = file_data.get('chunk_count', 0) |
634 | 638 |
|
635 | 639 | if msearch_body: |
636 | 640 | try: |
@@ -667,7 +671,7 @@ async def list_files( |
667 | 671 | else: |
668 | 672 | for file_data in files: |
669 | 673 | file_data['chunks'] = [] |
670 | | - file_data['chunk_count'] = 0 |
| 674 | + file_data['chunk_count'] = file_data.get('chunk_count', 0) |
671 | 675 |
|
672 | 676 | return {"files": files} |
673 | 677 |
|
@@ -919,3 +923,62 @@ def get_summary(index_name: str = Path(..., description="Name of the index to ge |
919 | 923 | except Exception as e: |
920 | 924 | error_msg = f"Failed to get summary: {str(e)}" |
921 | 925 | raise Exception(error_msg) |
| 926 | + |
| 927 | + @staticmethod |
| 928 | + def get_index_chunks( |
| 929 | + index_name: str, |
| 930 | + page: Optional[int] = None, |
| 931 | + page_size: Optional[int] = None, |
| 932 | + path_or_url: Optional[str] = None, |
| 933 | + vdb_core: VectorDatabaseCore = Depends(get_vector_db_core), |
| 934 | + ): |
| 935 | + """ |
| 936 | + Retrieve chunk records for the specified index with optional pagination. |
| 937 | +
|
| 938 | + Args: |
| 939 | + index_name: Name of the index to query |
| 940 | + page: Page number (1-based) when paginating |
| 941 | + page_size: Page size when paginating |
| 942 | + path_or_url: Optional document filter |
| 943 | + vdb_core: VectorDatabaseCore instance |
| 944 | +
|
| 945 | + Returns: |
| 946 | + Dictionary containing status, chunk list, total, and pagination metadata |
| 947 | + """ |
| 948 | + try: |
| 949 | + result = vdb_core.get_index_chunks( |
| 950 | + index_name, |
| 951 | + page=page, |
| 952 | + page_size=page_size, |
| 953 | + path_or_url=path_or_url, |
| 954 | + ) |
| 955 | + raw_chunks = result.get("chunks", []) |
| 956 | + total = result.get("total", len(raw_chunks)) |
| 957 | + result_page = result.get("page", page) |
| 958 | + result_page_size = result.get("page_size", page_size) |
| 959 | + |
| 960 | + filtered_chunks: List[Any] = [] |
| 961 | + for chunk in raw_chunks: |
| 962 | + if isinstance(chunk, dict): |
| 963 | + filtered_chunks.append( |
| 964 | + { |
| 965 | + field: chunk.get(field) |
| 966 | + for field in ALLOWED_CHUNK_FIELDS |
| 967 | + if field in chunk |
| 968 | + } |
| 969 | + ) |
| 970 | + else: |
| 971 | + filtered_chunks.append(chunk) |
| 972 | + |
| 973 | + return { |
| 974 | + "status": "success", |
| 975 | + "message": f"Successfully retrieved {len(filtered_chunks)} chunks from index {index_name}", |
| 976 | + "chunks": filtered_chunks, |
| 977 | + "total": total, |
| 978 | + "page": result_page, |
| 979 | + "page_size": result_page_size |
| 980 | + } |
| 981 | + except Exception as e: |
| 982 | + error_msg = f"Error retrieving chunks from index {index_name}: {str(e)}" |
| 983 | + logger.error(error_msg) |
| 984 | + raise Exception(error_msg) |
0 commit comments