Skip to content

Commit b2b16a4

Browse files
committed
✨ Now chunk preview supports pagination
1 parent 02e7b2f commit b2b16a4

File tree

11 files changed

+564
-180
lines changed

11 files changed

+564
-180
lines changed

backend/apps/vectordatabase_app.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,14 +202,23 @@ def health_check(vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)):
202202
def get_index_chunks(
203203
index_name: str = Path(...,
204204
description="Name of the index to get chunks from"),
205-
batch_size: int = Query(
206-
1000, description="Number of records to fetch per request"),
205+
page: int = Query(
206+
None, description="Page number (1-based) for pagination"),
207+
page_size: int = Query(
208+
None, description="Number of records per page for pagination"),
209+
path_or_url: Optional[str] = Query(
210+
None, description="Filter chunks by document path_or_url"),
207211
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)
208212
):
209-
"""Get all chunks from the specified index"""
213+
"""Get chunks from the specified index, with optional pagination support"""
210214
try:
211215
result = ElasticSearchService.get_index_chunks(
212-
index_name, batch_size, vdb_core)
216+
index_name=index_name,
217+
page=page,
218+
page_size=page_size,
219+
path_or_url=path_or_url,
220+
vdb_core=vdb_core,
221+
)
213222
return JSONResponse(status_code=HTTPStatus.OK, content=result)
214223
except Exception as e:
215224
error_msg = str(e)

backend/services/vectordatabase_service.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,8 @@ async def list_files(
575575
'file_size': file_info.get('file_size', 0),
576576
'create_time': int(utc_create_timestamp * 1000),
577577
'status': "COMPLETED",
578-
'latest_task_id': ''
578+
'latest_task_id': '',
579+
'chunk_count': file_info.get('chunk_count', 0)
579580
}
580581
files.append(file_data)
581582

@@ -633,7 +634,7 @@ async def list_files(
633634
# Initialize chunks for all files
634635
for file_data in files:
635636
file_data['chunks'] = []
636-
file_data['chunk_count'] = 0
637+
file_data['chunk_count'] = file_data.get('chunk_count', 0)
637638

638639
if msearch_body:
639640
try:
@@ -670,7 +671,7 @@ async def list_files(
670671
else:
671672
for file_data in files:
672673
file_data['chunks'] = []
673-
file_data['chunk_count'] = 0
674+
file_data['chunk_count'] = file_data.get('chunk_count', 0)
674675

675676
return {"files": files}
676677

@@ -925,27 +926,39 @@ def get_summary(index_name: str = Path(..., description="Name of the index to ge
925926

926927
@staticmethod
927928
def get_index_chunks(
928-
index_name: str = Path(...,
929-
description="Name of the index to get chunks from"),
930-
batch_size: int = Query(
931-
1000, description="Number of records to fetch per request"),
932-
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)
929+
index_name: str,
930+
page: Optional[int] = None,
931+
page_size: Optional[int] = None,
932+
path_or_url: Optional[str] = None,
933+
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core),
933934
):
934935
"""
935-
Retrieve all chunk records for the specified index.
936+
Retrieve chunk records for the specified index with optional pagination.
936937
937938
Args:
938939
index_name: Name of the index to query
939-
batch_size: Number of records to fetch per request
940+
page: Page number (1-based) when paginating
941+
page_size: Page size when paginating
942+
path_or_url: Optional document filter
940943
vdb_core: VectorDatabaseCore instance
941944
942945
Returns:
943-
Dictionary containing status and list of chunks
946+
Dictionary containing status, chunk list, total, and pagination metadata
944947
"""
945948
try:
946-
chunks = vdb_core.get_index_chunks(index_name, batch_size)
947-
filtered_chunks = []
948-
for chunk in chunks:
949+
result = vdb_core.get_index_chunks(
950+
index_name,
951+
page=page,
952+
page_size=page_size,
953+
path_or_url=path_or_url,
954+
)
955+
raw_chunks = result.get("chunks", [])
956+
total = result.get("total", len(raw_chunks))
957+
result_page = result.get("page", page)
958+
result_page_size = result.get("page_size", page_size)
959+
960+
filtered_chunks: List[Any] = []
961+
for chunk in raw_chunks:
949962
if isinstance(chunk, dict):
950963
filtered_chunks.append(
951964
{
@@ -956,11 +969,14 @@ def get_index_chunks(
956969
)
957970
else:
958971
filtered_chunks.append(chunk)
972+
959973
return {
960974
"status": "success",
961975
"message": f"Successfully retrieved {len(filtered_chunks)} chunks from index {index_name}",
962976
"chunks": filtered_chunks,
963-
"total": len(filtered_chunks)
977+
"total": total,
978+
"page": result_page,
979+
"page_size": result_page_size
964980
}
965981
except Exception as e:
966982
error_msg = f"Error retrieving chunks from index {index_name}: {str(e)}"

0 commit comments

Comments
 (0)