Skip to content

Commit 145c154

Browse files
authored
feat: Integrate Milvus service for enhanced knowledge base management and file deletion (#88)
* feat: Refactor system parameter management with new data structure and update logic * fix: 修复知识库相关问题
1 parent e300d13 commit 145c154

File tree

11 files changed

+132
-54
lines changed

11 files changed

+132
-54
lines changed

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.baomidou.mybatisplus.core.metadata.IPage;
44
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
5+
import com.datamate.common.setting.domain.repository.ModelConfigRepository;
56
import com.datamate.rag.indexer.domain.model.FileStatus;
67
import com.datamate.rag.indexer.domain.model.KnowledgeBase;
78
import com.datamate.rag.indexer.domain.model.RagChunk;
@@ -15,6 +16,7 @@
1516
import com.datamate.common.interfaces.PagingQuery;
1617
import com.datamate.rag.indexer.interfaces.dto.*;
1718
import lombok.RequiredArgsConstructor;
19+
import org.jetbrains.annotations.NotNull;
1820
import org.springframework.beans.BeanUtils;
1921
import org.springframework.context.ApplicationEventPublisher;
2022
import org.springframework.stereotype.Service;
@@ -36,6 +38,7 @@ public class KnowledgeBaseService {
3638
private final KnowledgeBaseRepository knowledgeBaseRepository;
3739
private final RagFileRepository ragFileRepository;
3840
private final ApplicationEventPublisher eventPublisher;
41+
private final ModelConfigRepository modelConfigRepository;
3942

4043

4144
/**
@@ -75,15 +78,39 @@ public void delete(String knowledgeBaseId) {
7578
// TODO: 删除知识库关联的所有文档
7679
}
7780

78-
public KnowledgeBase getById(String knowledgeBaseId) {
79-
return Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))
81+
public KnowledgeBaseResp getById(String knowledgeBaseId) {
82+
KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))
8083
.orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
84+
KnowledgeBaseResp resp = getKnowledgeBaseResp(knowledgeBase);
85+
resp.setEmbedding(modelConfigRepository.getById(knowledgeBase.getEmbeddingModel()));
86+
resp.setChat(modelConfigRepository.getById(knowledgeBase.getChatModel()));
87+
return resp;
8188
}
8289

83-
public PagedResponse<KnowledgeBase> list(KnowledgeBaseQueryReq request) {
90+
@NotNull
91+
private KnowledgeBaseResp getKnowledgeBaseResp(KnowledgeBase knowledgeBase) {
92+
KnowledgeBaseResp resp = new KnowledgeBaseResp();
93+
BeanUtils.copyProperties(knowledgeBase, resp);
94+
95+
// 获取该知识库的所有文件
96+
List<RagFile> files = ragFileRepository.findAllByKnowledgeBaseId(knowledgeBase.getId());
97+
resp.setFileCount((long) files.size());
98+
99+
// 计算分片总数
100+
long totalChunkCount = files.stream()
101+
.mapToLong(file -> file.getChunkCount() != null ? file.getChunkCount() : 0)
102+
.sum();
103+
resp.setChunkCount(totalChunkCount);
104+
return resp;
105+
}
106+
107+
public PagedResponse<KnowledgeBaseResp> list(KnowledgeBaseQueryReq request) {
84108
IPage<KnowledgeBase> page = new Page<>(request.getPage(), request.getSize());
85109
page = knowledgeBaseRepository.page(page, request);
86-
return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
110+
111+
// 将 KnowledgeBase 转换为 KnowledgeBaseResp,并计算 fileCount 和 chunkCount
112+
List<KnowledgeBaseResp> respList = page.getRecords().stream().map(this::getKnowledgeBaseResp).toList();
113+
return PagedResponse.of(respList, page.getCurrent(), page.getTotal(), page.getPages());
87114
}
88115

89116
@Transactional(rollbackFor = Exception.class)
@@ -104,7 +131,8 @@ public void addFiles(AddFilesReq request) {
104131

105132
public PagedResponse<RagFile> listFiles(String knowledgeBaseId, RagFileReq request) {
106133
IPage<RagFile> page = new Page<>(request.getPage(), request.getSize());
107-
page = ragFileRepository.page(page);
134+
request.setKnowledgeBaseId(knowledgeBaseId);
135+
page = ragFileRepository.page(page, request);
108136
return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
109137
}
110138

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package com.datamate.rag.indexer.domain.repository;
22

3+
import com.baomidou.mybatisplus.core.metadata.IPage;
34
import com.baomidou.mybatisplus.extension.repository.IRepository;
45
import com.datamate.rag.indexer.domain.model.RagFile;
6+
import com.datamate.rag.indexer.interfaces.dto.RagFileReq;
57

68
import java.util.List;
79

@@ -14,5 +16,9 @@
1416
public interface RagFileRepository extends IRepository<RagFile> {
1517
void removeByKnowledgeBaseId(String knowledgeBaseId);
1618

17-
List<RagFile> findByKnowledgeBaseId(String knowledgeBaseId);
19+
List<RagFile> findNotSuccessByKnowledgeBaseId(String knowledgeBaseId);
20+
21+
List<RagFile> findAllByKnowledgeBaseId(String knowledgeBaseId);
22+
23+
IPage<RagFile> page(IPage<RagFile> page, RagFileReq request);
1824
}

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
import com.datamate.common.setting.domain.entity.ModelConfig;
44
import com.datamate.common.setting.domain.repository.ModelConfigRepository;
55
import com.datamate.common.setting.infrastructure.client.ModelClient;
6+
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
7+
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
68
import com.datamate.rag.indexer.domain.model.FileStatus;
79
import com.datamate.rag.indexer.domain.model.RagFile;
810
import com.datamate.rag.indexer.domain.repository.RagFileRepository;
911
import com.datamate.rag.indexer.interfaces.dto.ProcessType;
10-
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
11-
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
12+
import com.google.common.collect.Lists;
1213
import dev.langchain4j.data.document.Document;
1314
import dev.langchain4j.data.document.DocumentParser;
1415
import dev.langchain4j.data.document.DocumentSplitter;
@@ -18,7 +19,10 @@
1819
import dev.langchain4j.data.document.parser.apache.poi.ApachePoiDocumentParser;
1920
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
2021
import dev.langchain4j.data.document.parser.markdown.MarkdownDocumentParser;
21-
import dev.langchain4j.data.document.splitter.*;
22+
import dev.langchain4j.data.document.splitter.DocumentByLineSplitter;
23+
import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter;
24+
import dev.langchain4j.data.document.splitter.DocumentBySentenceSplitter;
25+
import dev.langchain4j.data.document.splitter.DocumentByWordSplitter;
2226
import dev.langchain4j.data.document.transformer.jsoup.HtmlToTextDocumentTransformer;
2327
import dev.langchain4j.data.embedding.Embedding;
2428
import dev.langchain4j.data.segment.TextSegment;
@@ -68,7 +72,7 @@ public class RagEtlService {
6872
@TransactionalEventListener(phase = TransactionPhase.AFTER_COMMIT)
6973
public void processAfterCommit(DataInsertedEvent event) {
7074
// 执行 RAG 处理流水线
71-
List<RagFile> ragFiles = ragFileRepository.findByKnowledgeBaseId(event.knowledgeBase().getId());
75+
List<RagFile> ragFiles = ragFileRepository.findNotSuccessByKnowledgeBaseId(event.knowledgeBase().getId());
7276

7377
ragFiles.forEach(ragFile -> {
7478
try {
@@ -108,6 +112,7 @@ private void processRagFile(RagFile ragFile, DataInsertedEvent event) {
108112
if (Arrays.asList("html", "htm").contains(file.getFileType().toLowerCase())) {
109113
document = new HtmlToTextDocumentTransformer().transform(document);
110114
}
115+
document.metadata().put("fileId", ragFile.getFileId());
111116
// 使用文档分块器对文档进行分块
112117
DocumentSplitter splitter = documentSplitter(event.addFilesReq().getProcessType());
113118
List<TextSegment> split = splitter.split(document);
@@ -120,9 +125,12 @@ private void processRagFile(RagFile ragFile, DataInsertedEvent event) {
120125
ModelConfig model = modelConfigRepository.getById(event.knowledgeBase().getEmbeddingModel());
121126
EmbeddingModel embeddingModel = ModelClient.invokeEmbeddingModel(model);
122127
// 调用嵌入模型获取嵌入向量
123-
List<Embedding> content = embeddingModel.embedAll(split).content();
124-
// 存储嵌入向量到 Milvus
125-
embeddingStore(embeddingModel, event.knowledgeBase().getName()).addAll(content, split);
128+
129+
Lists.partition(split, 20).forEach(partition -> {
130+
List<Embedding> content = embeddingModel.embedAll(partition).content();
131+
// 存储嵌入向量到 Milvus
132+
embeddingStore(embeddingModel, event.knowledgeBase().getName()).addAll(content, partition);
133+
});
126134
}
127135

128136
/**

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
package com.datamate.rag.indexer.infrastructure.persistence.impl;
22

3+
import com.baomidou.mybatisplus.core.metadata.IPage;
34
import com.baomidou.mybatisplus.extension.repository.CrudRepository;
45
import com.datamate.rag.indexer.domain.model.FileStatus;
56
import com.datamate.rag.indexer.domain.model.RagFile;
67
import com.datamate.rag.indexer.domain.repository.RagFileRepository;
78
import com.datamate.rag.indexer.infrastructure.persistence.mapper.RagFileMapper;
9+
import com.datamate.rag.indexer.interfaces.dto.RagFileReq;
810
import org.springframework.stereotype.Repository;
11+
import org.springframework.util.StringUtils;
912

1013
import java.util.List;
1114

@@ -23,10 +26,25 @@ public void removeByKnowledgeBaseId(String knowledgeBaseId) {
2326
}
2427

2528
@Override
26-
public List<RagFile> findByKnowledgeBaseId(String knowledgeBaseId) {
29+
public List<RagFile> findNotSuccessByKnowledgeBaseId(String knowledgeBaseId) {
2730
return lambdaQuery()
2831
.eq(RagFile::getKnowledgeBaseId, knowledgeBaseId)
2932
.in(RagFile::getStatus, FileStatus.UNPROCESSED, FileStatus.PROCESS_FAILED)
3033
.list();
3134
}
35+
36+
@Override
37+
public List<RagFile> findAllByKnowledgeBaseId(String knowledgeBaseId) {
38+
return lambdaQuery()
39+
.eq(RagFile::getKnowledgeBaseId, knowledgeBaseId)
40+
.list();
41+
}
42+
43+
@Override
44+
public IPage<RagFile> page(IPage<RagFile> page, RagFileReq request) {
45+
return lambdaQuery()
46+
.eq(RagFile::getKnowledgeBaseId, request.getKnowledgeBaseId())
47+
.like(StringUtils.hasText(request.getFileName()), RagFile::getFileName, request.getFileName())
48+
.page(page);
49+
}
3250
}

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
package com.datamate.rag.indexer.interfaces;
22

3+
import com.datamate.common.interfaces.PagedResponse;
4+
import com.datamate.common.interfaces.PagingQuery;
35
import com.datamate.rag.indexer.application.KnowledgeBaseService;
4-
import com.datamate.rag.indexer.domain.model.KnowledgeBase;
56
import com.datamate.rag.indexer.domain.model.RagChunk;
67
import com.datamate.rag.indexer.domain.model.RagFile;
7-
import com.datamate.common.interfaces.PagedResponse;
8-
import com.datamate.common.interfaces.PagingQuery;
98
import com.datamate.rag.indexer.interfaces.dto.*;
109
import jakarta.validation.Valid;
1110
import lombok.RequiredArgsConstructor;
@@ -65,7 +64,7 @@ public void delete(@PathVariable("knowledgeBaseId") String knowledgeBaseId) {
6564
* @return 知识库
6665
*/
6766
@GetMapping("/{knowledgeBaseId}")
68-
public KnowledgeBase get(@PathVariable("knowledgeBaseId") String knowledgeBaseId) {
67+
public KnowledgeBaseResp get(@PathVariable("knowledgeBaseId") String knowledgeBaseId) {
6968
return knowledgeBaseService.getById(knowledgeBaseId);
7069
}
7170

@@ -75,7 +74,7 @@ public KnowledgeBase get(@PathVariable("knowledgeBaseId") String knowledgeBaseId
7574
* @return 知识库列表
7675
*/
7776
@PostMapping("/list")
78-
public PagedResponse<KnowledgeBase> list(@RequestBody @Valid KnowledgeBaseQueryReq request) {
77+
public PagedResponse<KnowledgeBaseResp> list(@RequestBody @Valid KnowledgeBaseQueryReq request) {
7978
return knowledgeBaseService.list(request);
8079
}
8180

@@ -129,4 +128,4 @@ public PagedResponse<RagChunk> getChunks(@PathVariable("knowledgeBaseId") String
129128
PagingQuery pagingQuery) {
130129
return knowledgeBaseService.getChunks(knowledgeBaseId, ragFileId, pagingQuery);
131130
}
132-
}
131+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package com.datamate.rag.indexer.interfaces.dto;
2+
3+
import com.datamate.common.setting.domain.entity.ModelConfig;
4+
import com.datamate.rag.indexer.domain.model.KnowledgeBase;
5+
import lombok.Getter;
6+
import lombok.Setter;
7+
8+
/**
9+
* 知识库响应实体
10+
*
11+
* @author dallas
12+
* @since 2025-11-17
13+
*/
14+
@Setter
15+
@Getter
16+
public class KnowledgeBaseResp extends KnowledgeBase {
17+
private Long fileCount;
18+
private Long chunkCount;
19+
private ModelConfig embedding;
20+
private ModelConfig chat;
21+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
package com.datamate.rag.indexer.interfaces.dto;
22

33
import com.datamate.common.interfaces.PagingQuery;
4+
import lombok.Getter;
5+
import lombok.Setter;
46

57
/**
68
* RAG 文件请求
79
*
810
* @author dallas
911
* @since 2025-10-29
1012
*/
13+
@Setter
14+
@Getter
1115
public class RagFileReq extends PagingQuery {
1216
private String fileName;
17+
private String knowledgeBaseId;
1318
}

frontend/src/pages/KnowledgeBase/Home/KnowledgeBasePage.tsx

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ export default function KnowledgeBasePage() {
2828
handleFiltersChange,
2929
} = useFetchData<KnowledgeBaseItem>(
3030
queryKnowledgeBasesUsingPost,
31-
mapKnowledgeBase
31+
(kb) => mapKnowledgeBase(kb, false) // 在首页不显示索引模型和文本理解模型字段
3232
);
3333

3434
const handleDeleteKB = async (kb: KnowledgeBaseItem) => {
@@ -84,20 +84,6 @@ export default function KnowledgeBasePage() {
8484
</Button>
8585
),
8686
},
87-
{
88-
title: "向量数据库",
89-
dataIndex: "embeddingModel",
90-
key: "embeddingModel",
91-
width: 150,
92-
ellipsis: true,
93-
},
94-
{
95-
title: "大语言模型",
96-
dataIndex: "chatModel",
97-
key: "chatModel",
98-
width: 150,
99-
ellipsis: true,
100-
},
10187
{
10288
title: "创建时间",
10389
dataIndex: "createdAt",
@@ -192,4 +178,4 @@ export default function KnowledgeBasePage() {
192178
)}
193179
</div>
194180
);
195-
}
181+
}

frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ export default function CreateKnowledgeBase({
129129
<Select
130130
placeholder="请选择索引模型"
131131
options={embeddingModelOptions}
132+
disabled={isEdit} // 编辑模式下禁用索引模型修改
132133
/>
133134
</Form.Item>
134135
<Form.Item
@@ -145,4 +146,4 @@ export default function CreateKnowledgeBase({
145146
</Modal>
146147
</>
147148
);
148-
}
149+
}

frontend/src/pages/KnowledgeBase/knowledge-base.const.tsx

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -63,35 +63,37 @@ export const KBTypeMap = {
6363
},
6464
};
6565

66-
export function mapKnowledgeBase(kb: KnowledgeBaseItem): KnowledgeBaseItem {
66+
export function mapKnowledgeBase(kb: KnowledgeBaseItem, showModelFields: boolean = true): KnowledgeBaseItem {
6767
return {
6868
...kb,
6969
icon: <BookOpenText className="w-full h-full" />,
7070
description: kb.description,
7171
statistics: [
72-
{
73-
label: "索引模型",
74-
key: "embeddingModel",
75-
icon: <VectorSquare className="w-4 h-4 text-blue-500" />,
76-
value: kb.embeddingModel,
77-
},
78-
{
79-
label: "文本理解模型",
80-
key: "chatModel",
81-
icon: <BookType className="w-4 h-4 text-blue-500" />,
82-
value: kb.chatModel,
83-
},
72+
...(showModelFields ? [
73+
{
74+
label: "索引模型",
75+
key: "embeddingModel",
76+
icon: <VectorSquare className="w-4 h-4 text-blue-500" />,
77+
value: kb.embedding?.modelName + (kb.embedding?.provider ? ` (${kb.embedding.provider})` : "") || "无",
78+
},
79+
{
80+
label: "文本理解模型",
81+
key: "chatModel",
82+
icon: <BookType className="w-4 h-4 text-blue-500" />,
83+
value: kb.chat?.modelName + (kb.chat?.provider ? ` (${kb.chat.provider})` : "") || "无",
84+
},
85+
] : []),
8486
{
8587
label: "文件数",
8688
key: "fileCount",
8789
icon: <File className="w-4 h-4 text-blue-500" />,
8890
value: formatNumber(kb?.fileCount) || 0,
8991
},
9092
{
91-
label: "大小",
92-
key: "size",
93+
label: "分块数",
94+
key: "chunkCount",
9395
icon: <ChartNoAxesColumn className="w-4 h-4 text-blue-500" />,
94-
value: formatBytes(kb?.size) || "0 MB",
96+
value: formatNumber(kb?.chunkCount) || 0,
9597
},
9698
],
9799
updatedAt: formatDateTime(kb.updatedAt),
@@ -112,4 +114,4 @@ export function mapFileData(file: Partial<KBFile>): KBFile {
112114
color: "#d9d9d9",
113115
},
114116
};
115-
}
117+
}

0 commit comments

Comments
 (0)