Skip to content

Commit 40541c5

Browse files
committed
✨ Now knowledgebase page supports chunk preview and chunk download
1 parent ffca4a8 commit 40541c5

File tree

10 files changed

+517
-21
lines changed

10 files changed

+517
-21
lines changed

backend/apps/vectordatabase_app.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Any, Dict, List, Optional
44

55
from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query
6+
from fastapi.responses import JSONResponse
67

78
from consts.model import IndexingResponse
89
from nexent.vector_database.base import VectorDatabaseCore
@@ -195,3 +196,24 @@ def health_check(vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)):
195196
return ElasticSearchService.health_check(vdb_core)
196197
except Exception as e:
197198
raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"{str(e)}")
199+
200+
201+
@router.post("/{index_name}/chunks")
202+
def get_index_chunks(
203+
index_name: str = Path(...,
204+
description="Name of the index to get chunks from"),
205+
batch_size: int = Query(
206+
1000, description="Number of records to fetch per request"),
207+
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)
208+
):
209+
"""Get all chunks from the specified index"""
210+
try:
211+
result = ElasticSearchService.get_index_chunks(
212+
index_name, batch_size, vdb_core)
213+
return JSONResponse(status_code=HTTPStatus.OK, content=result)
214+
except Exception as e:
215+
error_msg = str(e)
216+
logger.error(
217+
f"Error getting chunks for index '{index_name}': {error_msg}")
218+
raise HTTPException(
219+
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error getting chunks: {error_msg}")

backend/services/vectordatabase_service.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
from utils.config_utils import tenant_config_manager, get_model_name_from_config
3636
from utils.file_management_utils import get_all_files_status, get_file_size
3737

38+
ALLOWED_CHUNK_FIELDS = {"filename",
39+
"path_or_url", "content", "create_time", "id"}
40+
3841
# Configure logging
3942
logger = logging.getLogger("vectordatabase_service")
4043

@@ -919,3 +922,47 @@ def get_summary(index_name: str = Path(..., description="Name of the index to ge
919922
except Exception as e:
920923
error_msg = f"Failed to get summary: {str(e)}"
921924
raise Exception(error_msg)
925+
926+
@staticmethod
927+
def get_index_chunks(
928+
index_name: str = Path(...,
929+
description="Name of the index to get chunks from"),
930+
batch_size: int = Query(
931+
1000, description="Number of records to fetch per request"),
932+
vdb_core: VectorDatabaseCore = Depends(get_vector_db_core)
933+
):
934+
"""
935+
Retrieve all chunk records for the specified index.
936+
937+
Args:
938+
index_name: Name of the index to query
939+
batch_size: Number of records to fetch per request
940+
vdb_core: VectorDatabaseCore instance
941+
942+
Returns:
943+
Dictionary containing status and list of chunks
944+
"""
945+
try:
946+
chunks = vdb_core.get_index_chunks(index_name, batch_size)
947+
filtered_chunks = []
948+
for chunk in chunks:
949+
if isinstance(chunk, dict):
950+
filtered_chunks.append(
951+
{
952+
field: chunk.get(field)
953+
for field in ALLOWED_CHUNK_FIELDS
954+
if field in chunk
955+
}
956+
)
957+
else:
958+
filtered_chunks.append(chunk)
959+
return {
960+
"status": "success",
961+
"message": f"Successfully retrieved {len(filtered_chunks)} chunks from index {index_name}",
962+
"chunks": filtered_chunks,
963+
"total": len(filtered_chunks)
964+
}
965+
except Exception as e:
966+
error_msg = f"Error retrieving chunks from index {index_name}: {str(e)}"
967+
logger.error(error_msg)
968+
raise Exception(error_msg)
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
"use client";
2+
3+
import React, { useState, useEffect, useMemo, useCallback } from "react";
4+
import { useTranslation } from "react-i18next";
5+
import { Tabs, Card, Badge, Button, App, Spin, Tag } from "antd";
6+
import { Download, ScanText } from "lucide-react";
7+
import { FieldNumberOutlined } from "@ant-design/icons";
8+
import knowledgeBaseService from "@/services/knowledgeBaseService";
9+
import { Document } from "@/types/knowledgeBase";
10+
import log from "@/lib/logger";
11+
import { SETUP_PAGE_CONTAINER } from "@/const/layoutConstants";
12+
13+
interface Chunk {
14+
id: string;
15+
content: string;
16+
path_or_url?: string;
17+
filename?: string;
18+
create_time?: string;
19+
}
20+
21+
interface DocumentChunkProps {
22+
knowledgeBaseName: string;
23+
documents: Document[];
24+
getFileIcon: (type: string) => string;
25+
}
26+
27+
const FILENAME_TOOLTIP_THRESHOLD = 24;
28+
29+
const DocumentChunk: React.FC<DocumentChunkProps> = ({
30+
knowledgeBaseName,
31+
documents,
32+
getFileIcon,
33+
}) => {
34+
const { t } = useTranslation();
35+
const { message } = App.useApp();
36+
const [chunks, setChunks] = useState<Chunk[]>([]);
37+
const [loading, setLoading] = useState(false);
38+
const [activeDocumentKey, setActiveDocumentKey] = useState<string>("");
39+
40+
// Group chunks by document (path_or_url)
41+
const chunksByDocument = useMemo(() => {
42+
const grouped: Record<string, Chunk[]> = {};
43+
chunks.forEach((chunk) => {
44+
const docKey = chunk.path_or_url || chunk.filename || "unknown";
45+
if (!grouped[docKey]) {
46+
grouped[docKey] = [];
47+
}
48+
grouped[docKey].push(chunk);
49+
});
50+
return grouped;
51+
}, [chunks]);
52+
53+
// Load chunks when component mounts or knowledge base changes
54+
useEffect(() => {
55+
const loadChunks = async () => {
56+
if (!knowledgeBaseName) return;
57+
58+
setLoading(true);
59+
try {
60+
const loadedChunks = await knowledgeBaseService.previewChunks(
61+
knowledgeBaseName
62+
);
63+
setChunks(loadedChunks);
64+
} catch (error) {
65+
log.error("Failed to load chunks:", error);
66+
message.error(t("document.chunk.error.loadFailed"));
67+
} finally {
68+
setLoading(false);
69+
}
70+
};
71+
72+
loadChunks();
73+
}, [knowledgeBaseName, message, t]);
74+
75+
// Set active document when documents change
76+
useEffect(() => {
77+
if (documents.length > 0 && !activeDocumentKey) {
78+
setActiveDocumentKey(documents[0].id);
79+
}
80+
}, [documents, activeDocumentKey]);
81+
82+
// Download chunk as txt file
83+
const handleDownloadChunk = (chunk: Chunk) => {
84+
try {
85+
const content = chunk.content || "";
86+
const blob = new Blob([content], { type: "text/plain;charset=utf-8" });
87+
const url = URL.createObjectURL(blob);
88+
const link = document.createElement("a");
89+
link.href = url;
90+
link.download = `${chunk.id}.txt`;
91+
document.body.appendChild(link);
92+
link.click();
93+
document.body.removeChild(link);
94+
URL.revokeObjectURL(url);
95+
} catch (error) {
96+
log.error("Failed to download chunk:", error);
97+
message.error(t("document.chunk.error.downloadFailed"));
98+
}
99+
};
100+
101+
102+
// Create tab items for documents
103+
const getDisplayName = (name: string): string => {
104+
const lastDotIndex = name.lastIndexOf(".");
105+
if (lastDotIndex <= 0) {
106+
return name;
107+
}
108+
return name.substring(0, lastDotIndex);
109+
};
110+
111+
const [hoveredDocId, setHoveredDocId] = useState<string | null>(null);
112+
113+
const handleLabelMouseEnter = useCallback((docId: string) => {
114+
setHoveredDocId(docId);
115+
}, []);
116+
117+
const handleLabelMouseLeave = useCallback(() => {
118+
setHoveredDocId(null);
119+
}, []);
120+
121+
const renderDocumentLabel = (doc: Document, chunkCount: number) => {
122+
const displayName = getDisplayName(doc.name || "");
123+
const shouldExpandOnHover =
124+
(doc.name || "").length > displayName.length ||
125+
displayName.length > FILENAME_TOOLTIP_THRESHOLD;
126+
127+
const isHovered = hoveredDocId === doc.id;
128+
const widthClass =
129+
shouldExpandOnHover && isHovered ? "max-w-full" : "max-w-[200px]";
130+
131+
return (
132+
<div
133+
className="flex w-full items-center justify-between gap-2 min-w-0"
134+
onMouseEnter={() =>
135+
shouldExpandOnHover ? handleLabelMouseEnter(doc.id) : undefined
136+
}
137+
onMouseLeave={shouldExpandOnHover ? handleLabelMouseLeave : undefined}
138+
>
139+
<div className="flex items-center gap-1.5 min-w-0">
140+
<span>{getFileIcon(doc.type)}</span>
141+
<span
142+
className={`truncate text-sm font-medium text-gray-800 transition-[max-width] duration-200 ease-out inline-block ${widthClass}`}
143+
>
144+
{displayName}
145+
</span>
146+
</div>
147+
<Badge color="#1677ff" showZero count={chunkCount} className="flex-shrink-0" />
148+
</div>
149+
);
150+
};
151+
152+
const tabItems = documents.map((doc) => {
153+
const docChunks = chunksByDocument[doc.id] || [];
154+
const chunkCount = docChunks.length;
155+
156+
return {
157+
key: doc.id,
158+
label: renderDocumentLabel(doc, chunkCount),
159+
children: (
160+
<div className="h-full min-h-0 overflow-y-auto p-4">
161+
{loading ? (
162+
<div className="flex h-52 items-center justify-center">
163+
<Spin size="large" />
164+
</div>
165+
) : docChunks.length === 0 ? (
166+
<div className="rounded-md border border-dashed border-gray-200 p-10 text-center text-sm text-gray-500">
167+
{t("document.chunk.noChunks")}
168+
</div>
169+
) : (
170+
<div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-3">
171+
{docChunks.map((chunk, index) => (
172+
<Card
173+
key={chunk.id || index}
174+
size="small"
175+
className="flex flex-col"
176+
headStyle={{ padding: "8px 12px" }}
177+
title={
178+
<div className="flex items-center justify-between gap-2">
179+
<div className="flex flex-wrap gap-1">
180+
<Tag className="inline-flex items-center px-1.5 py-0.5 text-xs font-medium bg-gray-200 text-gray-800 border border-gray-200 rounded-md">
181+
<FieldNumberOutlined className="text-[12px]" />
182+
<span>{index + 1}</span>
183+
</Tag>
184+
<Tag className="inline-flex items-center gap-1 px-1.5 py-0.5 text-xs font-medium bg-gray-200 text-gray-800 border border-gray-200 rounded-md">
185+
<ScanText size={14} />
186+
<span>
187+
{t("document.chunk.characterCount", {
188+
count: (chunk.content || "").length,
189+
})}
190+
</span>
191+
</Tag>
192+
</div>
193+
<Button
194+
type="text"
195+
icon={<Download size={16} />}
196+
onClick={() => handleDownloadChunk(chunk)}
197+
size="small"
198+
className="self-center"
199+
/>
200+
</div>
201+
}
202+
>
203+
<div className="max-h-[200px] overflow-y-auto break-words whitespace-pre-wrap text-sm">
204+
{chunk.content || ""}
205+
</div>
206+
</Card>
207+
))}
208+
</div>
209+
)}
210+
</div>
211+
),
212+
};
213+
});
214+
215+
if (loading && chunks.length === 0) {
216+
return (
217+
<div className="flex h-full w-full items-center justify-center">
218+
<Spin size="large" />
219+
</div>
220+
);
221+
}
222+
223+
return (
224+
<div className="flex h-full w-full flex-col min-h-0">
225+
<Tabs
226+
tabPosition="top"
227+
activeKey={activeDocumentKey}
228+
onChange={setActiveDocumentKey}
229+
items={tabItems}
230+
className="h-full w-full"
231+
/>
232+
</div>
233+
);
234+
};
235+
236+
export default DocumentChunk;
237+

0 commit comments

Comments
 (0)