Skip to content

Commit 72a6522

Browse files
committed
feat: Enhance knowledge base management with collection renaming, improved chunk retrieval, and error handling
1 parent 4506fa8 commit 72a6522

File tree

10 files changed

+110
-33
lines changed

10 files changed

+110
-33
lines changed

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,14 @@
1616
import com.datamate.rag.indexer.infrastructure.event.DataInsertedEvent;
1717
import com.datamate.rag.indexer.infrastructure.milvus.MilvusService;
1818
import com.datamate.rag.indexer.interfaces.dto.*;
19+
import com.google.gson.JsonObject;
20+
import io.milvus.grpc.QueryResults;
21+
import io.milvus.param.R;
1922
import io.milvus.param.collection.DropCollectionParam;
23+
import io.milvus.param.collection.RenameCollectionParam;
2024
import io.milvus.param.dml.DeleteParam;
25+
import io.milvus.param.dml.QueryParam;
26+
import io.milvus.response.QueryResultsWrapper;
2127
import lombok.RequiredArgsConstructor;
2228
import org.jetbrains.annotations.NotNull;
2329
import org.springframework.beans.BeanUtils;
@@ -26,7 +32,9 @@
2632
import org.springframework.transaction.annotation.Transactional;
2733
import org.springframework.util.StringUtils;
2834

35+
import java.util.Collections;
2936
import java.util.List;
37+
import java.util.Map;
3038
import java.util.Optional;
3139

3240
/**
@@ -44,6 +52,8 @@ public class KnowledgeBaseService {
4452
private final ModelConfigRepository modelConfigRepository;
4553
private final MilvusService milvusService;
4654

55+
private JsonObject jsonObject = new JsonObject();
56+
4757
/**
4858
* 创建知识库
4959
*
@@ -63,10 +73,15 @@ public String create(KnowledgeBaseCreateReq request) {
6373
* @param knowledgeBaseId 知识库 ID
6474
* @param request 知识库更新请求
6575
*/
76+
@Transactional(rollbackFor = Exception.class)
6677
public void update(String knowledgeBaseId, KnowledgeBaseUpdateReq request) {
6778
KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))
6879
.orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
6980
if (StringUtils.hasText(request.getName())) {
81+
milvusService.getMilvusClient().renameCollection(RenameCollectionParam.newBuilder()
82+
.withOldCollectionName(knowledgeBase.getName())
83+
.withNewCollectionName(request.getName())
84+
.build());
7085
knowledgeBase.setName(request.getName());
7186
}
7287
if (StringUtils.hasText(request.getDescription())) {
@@ -75,7 +90,13 @@ public void update(String knowledgeBaseId, KnowledgeBaseUpdateReq request) {
7590
knowledgeBaseRepository.updateById(knowledgeBase);
7691
}
7792

78-
@Transactional
93+
94+
/**
95+
* 删除知识库
96+
*
97+
* @param knowledgeBaseId 知识库 ID
98+
*/
99+
@Transactional(rollbackFor = Exception.class)
79100
public void delete(String knowledgeBaseId) {
80101
KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))
81102
.orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
@@ -154,7 +175,34 @@ public void deleteFiles(String knowledgeBaseId, DeleteFilesReq request) {
154175
}
155176

156177
public PagedResponse<RagChunk> getChunks(String knowledgeBaseId, String ragFileId, PagingQuery pagingQuery) {
157-
IPage<RagChunk> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
158-
return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
178+
KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))
179+
.orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
180+
R<QueryResults> results = milvusService.getMilvusClient().query(QueryParam.newBuilder()
181+
.withCollectionName(knowledgeBase.getName())
182+
.withExpr("metadata[\"rag_file_id\"] == \"" + ragFileId + "\"")
183+
.withOutFields(Collections.singletonList("*"))
184+
.withLimit(Long.valueOf(pagingQuery.getSize()))
185+
.withOffset((long) (pagingQuery.getPage() - 1) * pagingQuery.getSize())
186+
.build());
187+
QueryResultsWrapper wrapper = new QueryResultsWrapper(results.getData());
188+
List<Map<String, Object>> list = wrapper.getRowRecords().stream().map(QueryResultsWrapper.RowRecord::getFieldValues).toList();
189+
190+
List<RagChunk> ragChunks = list.stream().map(item -> new RagChunk(
191+
item.get("id").toString(),
192+
item.get("text").toString(),
193+
item.get("metadata").toString()
194+
)).toList();
195+
196+
// 获取总数
197+
R<QueryResults> countResults = milvusService.getMilvusClient().query(QueryParam.newBuilder()
198+
.withCollectionName(knowledgeBase.getName())
199+
.withExpr("metadata[\"rag_file_id\"] == \"" + ragFileId + "\"")
200+
.withOutFields(Collections.singletonList("count(*)"))
201+
.build());
202+
QueryResultsWrapper countWrapper = new QueryResultsWrapper(countResults.getData());
203+
List<Map<String, Object>> countList = countWrapper.getRowRecords().stream().map(QueryResultsWrapper.RowRecord::getFieldValues).toList();
204+
long totalCount = Long.parseLong(countList.getFirst().get("count(*)").toString());
205+
206+
return PagedResponse.of(ragChunks, pagingQuery.getPage(), totalCount, (int) Math.ceil((double) totalCount / pagingQuery.getSize()));
159207
}
160208
}

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagChunk.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,10 @@
66
* @author dallas
77
* @since 2025-10-29
88
*/
9-
public class RagChunk {
10-
}
9+
10+
public record RagChunk(
11+
String id,
12+
String text,
13+
String metadata
14+
) {
15+
}

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagFile.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,6 @@ public class RagFile extends BaseEntity<String> {
4444
private Map<String, Object> metadata;
4545

4646
private FileStatus status;
47+
48+
private String errMsg;
4749
}

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import com.datamate.rag.indexer.domain.model.RagFile;
1010
import com.datamate.rag.indexer.domain.repository.RagFileRepository;
1111
import com.datamate.rag.indexer.infrastructure.milvus.MilvusService;
12-
import com.datamate.rag.indexer.interfaces.dto.ProcessType;
12+
import com.datamate.rag.indexer.interfaces.dto.AddFilesReq;
1313
import com.google.common.collect.Lists;
1414
import dev.langchain4j.data.document.Document;
1515
import dev.langchain4j.data.document.DocumentParser;
@@ -20,10 +20,7 @@
2020
import dev.langchain4j.data.document.parser.apache.poi.ApachePoiDocumentParser;
2121
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
2222
import dev.langchain4j.data.document.parser.markdown.MarkdownDocumentParser;
23-
import dev.langchain4j.data.document.splitter.DocumentByLineSplitter;
24-
import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter;
25-
import dev.langchain4j.data.document.splitter.DocumentBySentenceSplitter;
26-
import dev.langchain4j.data.document.splitter.DocumentByWordSplitter;
23+
import dev.langchain4j.data.document.splitter.*;
2724
import dev.langchain4j.data.document.transformer.jsoup.HtmlToTextDocumentTransformer;
2825
import dev.langchain4j.data.embedding.Embedding;
2926
import dev.langchain4j.data.segment.TextSegment;
@@ -85,6 +82,7 @@ public void processAfterCommit(DataInsertedEvent event) {
8582
// 处理异常
8683
log.error("Error processing RAG file: {}", ragFile.getFileId(), e);
8784
ragFile.setStatus(FileStatus.PROCESS_FAILED);
85+
ragFile.setErrMsg(e.getMessage());
8886
ragFileRepository.updateById(ragFile);
8987
} finally {
9088
SEMAPHORE.release();
@@ -109,7 +107,7 @@ private void processRagFile(RagFile ragFile, DataInsertedEvent event) {
109107
}
110108
document.metadata().put("rag_file_id", ragFile.getId());
111109
// 使用文档分块器对文档进行分块
112-
DocumentSplitter splitter = documentSplitter(event.addFilesReq().getProcessType());
110+
DocumentSplitter splitter = documentSplitter(event.addFilesReq());
113111
List<TextSegment> split = splitter.split(document);
114112

115113
// 更新分块数量
@@ -145,13 +143,14 @@ public DocumentParser documentParser(String fileType) {
145143
};
146144
}
147145

148-
public DocumentSplitter documentSplitter(ProcessType processType) {
149-
return switch (processType) {
150-
case PARAGRAPH_CHUNK -> new DocumentByParagraphSplitter(1000, 100);
151-
case CHAPTER_CHUNK -> new DocumentByLineSplitter(1000, 100);
152-
case CUSTOM_SEPARATOR_CHUNK -> new DocumentBySentenceSplitter(1000, 100);
153-
case LENGTH_CHUNK -> new DocumentByWordSplitter(1000, 100);
154-
case DEFAULT_CHUNK -> new DocumentByLineSplitter(1000, 100);
146+
public DocumentSplitter documentSplitter(AddFilesReq req) {
147+
return switch (req.getProcessType()) {
148+
case PARAGRAPH_CHUNK -> new DocumentByParagraphSplitter(req.getChunkSize(), req.getOverlapSize());
149+
case SENTENCE_CHUNK -> new DocumentBySentenceSplitter(req.getChunkSize(), req.getOverlapSize());
150+
case LENGTH_CHUNK -> new DocumentByCharacterSplitter(req.getChunkSize(), req.getOverlapSize());
151+
case DEFAULT_CHUNK -> new DocumentByWordSplitter(req.getChunkSize(), req.getOverlapSize());
152+
case CUSTOM_SEPARATOR_CHUNK ->
153+
new DocumentByRegexSplitter(req.getDelimiter(), "", req.getChunkSize(), req.getOverlapSize());
155154
};
156155
}
157-
}
156+
}

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/milvus/MilvusService.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ public EmbeddingStore<TextSegment> embeddingStore(EmbeddingModel embeddingModel,
3636
.build();
3737
}
3838

39+
/**
40+
* 单例模式获取 Milvus 客户端,不依赖 Spring 容器
41+
*
42+
* @return MilvusClient
43+
*/
3944
public MilvusClient getMilvusClient() {
4045
if (milvusClient == null) {
4146
synchronized (this) {

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import org.springframework.web.bind.annotation.*;
1212

1313

14-
1514
/**
1615
* 知识库控制器
1716
*
@@ -124,8 +123,20 @@ public void deleteFile(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
124123
*/
125124
@GetMapping("/{knowledgeBaseId}/files/{ragFileId}")
126125
public PagedResponse<RagChunk> getChunks(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
127-
@PathVariable("ragFileId") String ragFileId,
128-
PagingQuery pagingQuery) {
126+
@PathVariable("ragFileId") String ragFileId,
127+
PagingQuery pagingQuery) {
129128
return knowledgeBaseService.getChunks(knowledgeBaseId, ragFileId, pagingQuery);
130129
}
130+
131+
/**
132+
* 检索知识库内容
133+
*
134+
* @param knowledgeBaseId 知识库 ID
135+
*/
136+
@PostMapping("/{knowledgeBaseId}/retrieve")
137+
public PagedResponse<RagChunk> retrieve(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
138+
@RequestBody @Valid RetrieveReq request,
139+
PagingQuery pagingQuery) {
140+
return knowledgeBaseService.retrieve(knowledgeBaseId, request, pagingQuery);
141+
}
131142
}

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/AddFilesReq.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public class AddFilesReq {
1818
private ProcessType processType;
1919
private Integer chunkSize;
2020
private Integer overlapSize;
21-
private String customSeparator;
21+
private String delimiter;
2222
private List<FileInfo> files;
2323

2424
public record FileInfo(String id, String name) {

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/ProcessType.java

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,28 @@
77
* @since 2025-10-29
88
*/
99
public enum ProcessType {
10-
/**
11-
* 章节分块
12-
*/
13-
CHAPTER_CHUNK,
1410
/**
1511
* 段落分块
1612
*/
1713
PARAGRAPH_CHUNK,
1814

1915
/**
20-
* 按长度分块
16+
* 按句子分块
2117
*/
22-
LENGTH_CHUNK,
18+
SENTENCE_CHUNK,
2319

2420
/**
25-
* 自定义分割符分块
21+
* 按长度分块,字符串分块
2622
*/
27-
CUSTOM_SEPARATOR_CHUNK,
23+
LENGTH_CHUNK,
2824

2925
/**
30-
* 默认分块
26+
* 默认分块,按单词分块
3127
*/
3228
DEFAULT_CHUNK,
29+
30+
/**
31+
* 自定义分割符分块
32+
*/
33+
CUSTOM_SEPARATOR_CHUNK,
3334
}

backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/exception/KnowledgeBaseErrorCode.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@ public enum KnowledgeBaseErrorCode implements ErrorCode {
1616
/**
1717
* 知识库不存在
1818
*/
19-
KNOWLEDGE_BASE_NOT_FOUND("knowledge.0001", "知识库不存在");
19+
KNOWLEDGE_BASE_NOT_FOUND("knowledge.0001", "知识库不存在"),
20+
21+
/**
22+
* 文件不存在
23+
*/
24+
RAG_FILE_NOT_FOUND("knowledge.0002", "文件不存在");
2025

2126
private final String code;
2227
private final String message;

scripts/db/rag-management-init.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ create table if not exists t_rag_file
2222
chunk_count INT COMMENT '切片数',
2323
metadata JSON COMMENT '元数据',
2424
status VARCHAR(50) COMMENT '文件状态',
25+
err_msg text NULL COMMENT '错误信息',
2526
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
2627
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
2728
created_by VARCHAR(255) COMMENT '创建者',

0 commit comments

Comments
 (0)