Skip to content

Commit 99f9cf4

Browse files
authored
Merge pull request #9 from ModelEngine-Group/develop_930_gfl
feature:增加本地归集功能
2 parents 32e15ad + 6912f93 commit 99f9cf4

File tree

10 files changed

+337
-21
lines changed

10 files changed

+337
-21
lines changed

backend/api-gateway/src/main/java/com/dataengine/gateway/ApiGatewayApplication.java

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,48 +26,52 @@ public static void main(String[] args) {
2626
@Bean
2727
public RouteLocator customRouteLocator(RouteLocatorBuilder builder) {
2828
return builder.routes()
29+
// 数据归集服务路由
30+
.route("data-collection", r -> r.path("/api/data-collection/**")
31+
.uri("lb://data-collection-service"))
32+
2933
// 数据管理服务路由
3034
.route("data-management", r -> r.path("/api/data-management/**")
3135
.uri("lb://data-management-service"))
32-
36+
3337
// 算子市场服务路由
3438
.route("operator-market", r -> r.path("/api/operators/**")
3539
.uri("lb://operator-market-service"))
36-
40+
3741
// 数据清洗服务路由
3842
.route("data-cleaning", r -> r.path("/api/cleaning/**")
3943
.uri("lb://data-cleaning-service"))
40-
44+
4145
// 数据合成服务路由
4246
.route("data-synthesis", r -> r.path("/api/synthesis/**")
4347
.uri("lb://data-synthesis-service"))
44-
48+
4549
// 数据标注服务路由
4650
.route("data-annotation", r -> r.path("/api/annotation/**")
4751
.uri("lb://data-annotation-service"))
48-
52+
4953
// 数据评估服务路由
5054
.route("data-evaluation", r -> r.path("/api/evaluation/**")
5155
.uri("lb://data-evaluation-service"))
52-
56+
5357
// 流程编排服务路由
5458
.route("pipeline-orchestration", r -> r.path("/api/pipelines/**")
5559
.uri("lb://pipeline-orchestration-service"))
56-
60+
5761
// 执行引擎服务路由
5862
.route("execution-engine", r -> r.path("/api/execution/**")
5963
.uri("lb://execution-engine-service"))
60-
64+
6165
// 认证服务路由
6266
.route("auth-service", r -> r.path("/api/auth/**")
6367
.uri("lb://auth-service"))
64-
68+
6569
// RAG服务路由
6670
.route("rag-indexer", r -> r.path("/api/rag/indexer/**")
6771
.uri("lb://rag-indexer-service"))
6872
.route("rag-query", r -> r.path("/api/rag/query/**")
6973
.uri("lb://rag-query-service"))
70-
74+
7175
.build();
7276
}
7377
}

backend/openapi/specs/data-collection.yaml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ tags:
2323
description: 任务执行管理
2424

2525
paths:
26-
/tasks:
26+
/data-collection/tasks:
2727
get:
28+
operationId: getTasks
2829
tags: [CollectionTask]
2930
summary: 获取归集任务列表
3031
parameters:
@@ -56,6 +57,7 @@ paths:
5657
$ref: '#/components/schemas/PagedCollectionTaskSummary'
5758

5859
post:
60+
operationId: createTask
5961
tags: [CollectionTask]
6062
summary: 创建归集任务
6163
description: 创建新的数据归集任务
@@ -73,8 +75,9 @@ paths:
7375
schema:
7476
$ref: '#/components/schemas/CollectionTaskResponse'
7577

76-
/tasks/{id}:
78+
/data-collection/tasks/{id}:
7779
get:
80+
operationId: getTaskDetail
7881
tags: [CollectionTask]
7982
summary: 获取归集任务详情
8083
parameters:
@@ -94,6 +97,7 @@ paths:
9497
description: 归集任务不存在
9598

9699
put:
100+
operationId: updateTask
97101
tags: [CollectionTask]
98102
summary: 更新归集任务
99103
parameters:
@@ -117,6 +121,7 @@ paths:
117121
$ref: '#/components/schemas/CollectionTaskResponse'
118122

119123
delete:
124+
operationId: deleteTask
120125
tags: [CollectionTask]
121126
summary: 删除归集任务
122127
parameters:

backend/services/data-collection-service/src/main/java/com/dataengine/collection/interfaces/facade/CollectionTaskController.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ private String mapToJsonString(Map<String, Object> map) {
8282
}
8383

8484
@Override
85-
public ResponseEntity<CollectionTaskResponse> tasksPost(CreateCollectionTaskRequest body) {
85+
public ResponseEntity<CollectionTaskResponse> createTask(CreateCollectionTaskRequest body) {
8686
CollectionTask t = new CollectionTask();
8787
t.setName(body.getName());
8888
t.setDescription(body.getDescription());
@@ -94,7 +94,7 @@ public ResponseEntity<CollectionTaskResponse> tasksPost(CreateCollectionTaskRequ
9494
}
9595

9696
@Override
97-
public ResponseEntity<CollectionTaskResponse> tasksIdPut(String id, UpdateCollectionTaskRequest body) {
97+
public ResponseEntity<CollectionTaskResponse> updateTask(String id, UpdateCollectionTaskRequest body) {
9898
CollectionTask t = taskService.get(id);
9999
if (t == null) {
100100
return ResponseEntity.notFound().build();
@@ -110,19 +110,19 @@ public ResponseEntity<CollectionTaskResponse> tasksIdPut(String id, UpdateCollec
110110
}
111111

112112
@Override
113-
public ResponseEntity<Void> tasksIdDelete(String id) {
113+
public ResponseEntity<Void> deleteTask(String id) {
114114
taskService.delete(id);
115115
return ResponseEntity.noContent().build();
116116
}
117117

118118
@Override
119-
public ResponseEntity<CollectionTaskResponse> tasksIdGet(String id) {
119+
public ResponseEntity<CollectionTaskResponse> getTaskDetail(String id) {
120120
CollectionTask t = taskService.get(id);
121121
return t == null ? ResponseEntity.notFound().build() : ResponseEntity.ok(toResponse(t));
122122
}
123123

124124
@Override
125-
public ResponseEntity<PagedCollectionTaskSummary> tasksGet(Integer page, Integer size, TaskStatus status, String name) {
125+
public ResponseEntity<PagedCollectionTaskSummary> getTasks(Integer page, Integer size, TaskStatus status, String name) {
126126
var list = taskService.list(page, size, status == null ? null : status.getValue(), name);
127127
PagedCollectionTaskSummary p = new PagedCollectionTaskSummary();
128128
p.setContent(list.stream().map(this::toSummary).collect(Collectors.toList()));

backend/services/data-management-service/src/main/java/com/dataengine/datamanagement/DataManagementServiceConfiguration.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
package com.dataengine.datamanagement;
22

3+
import org.springframework.cloud.openfeign.EnableFeignClients;
34
import org.springframework.context.annotation.ComponentScan;
45
import org.springframework.context.annotation.Configuration;
6+
import org.springframework.scheduling.annotation.EnableAsync;
57

68
/**
79
* Data Management Service Configuration
810
* 数据管理服务配置类 - 多源接入、元数据、血缘治理
911
*/
1012
@Configuration
13+
@EnableFeignClients(basePackages = "com.dataengine.datamanagement.infrastructure.client")
14+
@EnableAsync
1115
@ComponentScan(basePackages = {
1216
"com.dataengine.datamanagement",
1317
"com.dataengine.shared"

backend/services/data-management-service/src/main/java/com/dataengine/datamanagement/application/service/DatasetApplicationService.java

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,29 @@
11
package com.dataengine.datamanagement.application.service;
22

33
import com.dataengine.datamanagement.domain.model.dataset.Dataset;
4+
import com.dataengine.datamanagement.domain.model.dataset.DatasetFile;
45
import com.dataengine.datamanagement.domain.model.dataset.Tag;
6+
import com.dataengine.datamanagement.infrastructure.client.CollectionTaskClient;
7+
import com.dataengine.datamanagement.infrastructure.client.dto.CollectionTaskDetailResponse;
8+
import com.dataengine.datamanagement.infrastructure.client.dto.LocalCollectionConfig;
59
import com.dataengine.datamanagement.infrastructure.persistence.mapper.DatasetFileMapper;
610
import com.dataengine.datamanagement.infrastructure.persistence.mapper.DatasetMapper;
711
import com.dataengine.datamanagement.infrastructure.persistence.mapper.TagMapper;
812
import com.dataengine.datamanagement.interfaces.converter.DatasetConverter;
913
import com.dataengine.datamanagement.interfaces.dto.AllDatasetStatisticsResponse;
1014
import com.dataengine.datamanagement.interfaces.dto.CreateDatasetRequest;
1115
import com.dataengine.datamanagement.interfaces.dto.DatasetPagingQuery;
16+
import com.fasterxml.jackson.databind.ObjectMapper;
17+
import lombok.extern.slf4j.Slf4j;
1218
import org.apache.commons.collections4.CollectionUtils;
19+
import org.apache.commons.lang3.StringUtils;
1320
import org.apache.ibatis.session.RowBounds;
1421
import org.springframework.beans.factory.annotation.Autowired;
1522
import org.springframework.beans.factory.annotation.Value;
1623
import org.springframework.data.domain.Page;
1724
import org.springframework.data.domain.PageImpl;
1825
import org.springframework.data.domain.PageRequest;
26+
import org.springframework.scheduling.annotation.Async;
1927
import org.springframework.stereotype.Service;
2028
import org.springframework.transaction.annotation.Transactional;
2129

@@ -25,22 +33,34 @@
2533
/**
2634
* 数据集应用服务(对齐 DB schema,使用 UUID 字符串主键)
2735
*/
36+
@Slf4j
2837
@Service
2938
@Transactional
3039
public class DatasetApplicationService {
3140

3241
private final DatasetMapper datasetMapper;
3342
private final TagMapper tagMapper;
3443
private final DatasetFileMapper datasetFileMapper;
44+
private final CollectionTaskClient collectionTaskClient;
45+
private final FileMetadataService fileMetadataService;
46+
private final ObjectMapper objectMapper;
3547

3648
@Value("${dataset.base.path:/dataset}")
3749
private String datasetBasePath;
3850

3951
@Autowired
40-
public DatasetApplicationService(DatasetMapper datasetMapper, TagMapper tagMapper, DatasetFileMapper datasetFileMapper) {
52+
public DatasetApplicationService(DatasetMapper datasetMapper,
53+
TagMapper tagMapper,
54+
DatasetFileMapper datasetFileMapper,
55+
CollectionTaskClient collectionTaskClient,
56+
FileMetadataService fileMetadataService,
57+
ObjectMapper objectMapper) {
4158
this.datasetMapper = datasetMapper;
4259
this.tagMapper = tagMapper;
4360
this.datasetFileMapper = datasetFileMapper;
61+
this.collectionTaskClient = collectionTaskClient;
62+
this.fileMetadataService = fileMetadataService;
63+
this.objectMapper = objectMapper;
4464
}
4565

4666
/**
@@ -66,6 +86,11 @@ public Dataset createDataset(CreateDatasetRequest createDatasetRequest) {
6686
}
6787
}
6888

89+
if (StringUtils.isNotBlank(createDatasetRequest.getDataSource())) {
90+
// 数据源id不为空,使用异步线程进行文件扫盘落库
91+
processDataSourceAsync(dataset.getId(), createDatasetRequest.getDataSource());
92+
}
93+
6994
// 返回创建的数据集,包含标签信息
7095
Dataset createdDataset = datasetMapper.findById(dataset.getId());
7196
createdDataset.getTags().addAll(processedTags);
@@ -241,4 +266,78 @@ public Map<String, Object> getDatasetStatistics(String datasetId) {
241266
public AllDatasetStatisticsResponse getAllDatasetStatistics() {
242267
return datasetMapper.getAllDatasetStatistics();
243268
}
269+
270+
/**
271+
* 异步处理数据源文件扫描
272+
* @param datasetId 数据集ID
273+
* @param dataSourceId 数据源ID(归集任务ID)
274+
*/
275+
@Async
276+
public void processDataSourceAsync(String datasetId, String dataSourceId) {
277+
try {
278+
log.info("开始处理数据源文件扫描,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId);
279+
280+
// 1. 调用数据归集服务获取任务详情
281+
CollectionTaskDetailResponse taskDetail = collectionTaskClient.getTaskDetail(dataSourceId);
282+
if (taskDetail == null) {
283+
log.error("获取归集任务详情失败,任务ID: {}", dataSourceId);
284+
return;
285+
}
286+
287+
log.info("获取到归集任务详情: {}", taskDetail.getName());
288+
289+
// 2. 解析任务配置
290+
LocalCollectionConfig config = parseTaskConfig(taskDetail.getConfig());
291+
if (config == null) {
292+
log.error("解析任务配置失败,任务ID: {}", dataSourceId);
293+
return;
294+
}
295+
296+
// 3. 检查任务类型是否为 LOCAL_COLLECTION
297+
if (!"LOCAL_COLLECTION".equalsIgnoreCase(config.getType())) {
298+
log.info("任务类型不是 LOCAL_COLLECTION,跳过文件扫描。任务类型: {}", config.getType());
299+
return;
300+
}
301+
302+
// 4. 获取文件路径列表
303+
List<String> filePaths = config.getFilePaths();
304+
if (CollectionUtils.isEmpty(filePaths)) {
305+
log.warn("文件路径列表为空,任务ID: {}", dataSourceId);
306+
return;
307+
}
308+
309+
log.info("开始扫描文件,共 {} 个文件路径", filePaths.size());
310+
311+
// 5. 扫描文件元数据
312+
List<DatasetFile> datasetFiles = fileMetadataService.scanFiles(filePaths, datasetId);
313+
314+
// 6. 批量插入数据集文件表
315+
if (CollectionUtils.isNotEmpty(datasetFiles)) {
316+
for (DatasetFile datasetFile : datasetFiles) {
317+
datasetFileMapper.insert(datasetFile);
318+
}
319+
log.info("文件元数据写入完成,共写入 {} 条记录", datasetFiles.size());
320+
} else {
321+
log.warn("未扫描到有效文件");
322+
}
323+
324+
} catch (Exception e) {
325+
log.error("处理数据源文件扫描失败,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId, e);
326+
}
327+
}
328+
329+
/**
330+
* 解析任务配置
331+
*/
332+
private LocalCollectionConfig parseTaskConfig(Map<String, Object> configMap) {
333+
try {
334+
if (configMap == null || configMap.isEmpty()) {
335+
return null;
336+
}
337+
return objectMapper.convertValue(configMap, LocalCollectionConfig.class);
338+
} catch (Exception e) {
339+
log.error("解析任务配置失败", e);
340+
return null;
341+
}
342+
}
244343
}

backend/services/data-management-service/src/main/java/com/dataengine/datamanagement/application/service/DatasetFileApplicationService.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ public DatasetFile uploadFile(String datasetId, MultipartFile file, String descr
9292
datasetFile.setDatasetId(datasetId);
9393
datasetFile.setFileName(fileName);
9494
datasetFile.setFilePath(targetLocation.toString());
95-
datasetFile.setFileType(file.getContentType());
96-
datasetFile.setFileFormat(getFileExtension(originalFilename));
95+
datasetFile.setFileType(getFileExtension(originalFilename));
96+
datasetFile.setFileFormat(file.getContentType());
9797
datasetFile.setFileSize(file.getSize());
9898
datasetFile.setUploadTime(LocalDateTime.now());
9999
datasetFile.setStatus(StatusConstants.DatasetFileStatuses.COMPLETED);
@@ -241,7 +241,6 @@ private void saveFileInfoToDb(FileUploadResult fileUploadResult, UploadFileReque
241241
.fileSize(savedFile.length())
242242
.uploadTime(currentTime)
243243
.lastAccessTime(currentTime)
244-
.lastAccessTime(currentTime)
245244
.fileName(uploadFile.getFileName())
246245
.filePath(savedFile.getPath())
247246
.fileType(AnalyzerUtils.getExtension(uploadFile.getFileName()))

0 commit comments

Comments
 (0)