Skip to content

Commit 08bd4ec

Browse files
authored
feature:增加数据配比功能 (#52)
* refactor: 修改调整数据归集实现,删除无用代码,优化代码结构 * feature: 每天凌晨00:00扫描所有数据集,检查数据集是否超过了预设的保留天数,超出保留天数的数据集调用删除接口进行删除 * fix: 修改删除数据集文件的逻辑,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录 * fix: 增加参数校验和接口定义,删除不使用的接口 * fix: 数据集统计数据默认为0 * feature: 数据集状态增加流转,创建时为草稿状态,上传文件或者归集文件后修改为活动状态 * refactor: 修改分页查询归集任务的代码 * fix: 更新后重新执行;归集任务执行增加事务控制 * feature: 创建归集任务时能够同步创建数据集,更新归集任务时能更新到指定数据集 * fix: 创建归集任务不需要创建数据集时不应该报错 * fix: 修复删除文件时数据集的统计数据不变动 * feature: 查询数据集详情时能够获取到文件标签分布 * fix: tags为空时不进行分析 * fix: 状态修改为ACTIVE * fix: 修改解析tag的方法 * feature: 实现创建、分页查询、删除配比任务 * feature: 实现创建、分页查询、删除配比任务的前端交互 * fix: 修复进度计算异常导致的页面报错
1 parent 07edf16 commit 08bd4ec

File tree

32 files changed

+1894
-1028
lines changed

32 files changed

+1894
-1028
lines changed

backend/services/data-collection-service/src/main/java/com/datamate/collection/interfaces/rest/CollectionTaskController.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import com.datamate.datamanagement.application.DatasetApplicationService;
1111
import com.datamate.datamanagement.domain.model.dataset.Dataset;
1212
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
13+
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
1314
import jakarta.validation.Valid;
1415
import lombok.RequiredArgsConstructor;
1516
import lombok.extern.slf4j.Slf4j;
@@ -35,11 +36,13 @@ public class CollectionTaskController{
3536
public ResponseEntity<CollectionTaskResponse> createTask(@Valid @RequestBody CreateCollectionTaskRequest request) {
3637
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
3738
String datasetId = null;
39+
DatasetResponse dataset = null;
3840
if (Objects.nonNull(request.getDataset())) {
39-
datasetId = datasetService.createDataset(request.getDataset()).getId();
41+
dataset = DatasetConverter.INSTANCE.convertToResponse(datasetService.createDataset(request.getDataset()));
42+
datasetId = dataset.getId();
4043
}
4144
CollectionTaskResponse response = CollectionTaskConverter.INSTANCE.toResponse(taskService.create(task, datasetId));
42-
response.setDataset(DatasetConverter.INSTANCE.convertToResponse(datasetService.getDataset(datasetId)));
45+
response.setDataset(dataset);
4346
return ResponseEntity.ok().body(response);
4447
}
4548

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetApplicationService.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ public void deleteDataset(String datasetId) {
119119
public Dataset getDataset(String datasetId) {
120120
Dataset dataset = datasetRepository.getById(datasetId);
121121
BusinessAssert.notNull(dataset, DataManagementErrorCode.DATASET_NOT_FOUND);
122+
List<DatasetFile> datasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
123+
dataset.setFiles(datasetFiles);
122124
return dataset;
123125
}
124126

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ public DatasetFile getDatasetFile(String datasetId, String fileId) {
102102
public void deleteDatasetFile(String datasetId, String fileId) {
103103
DatasetFile file = getDatasetFile(datasetId, fileId);
104104
Dataset dataset = datasetRepository.getById(datasetId);
105+
dataset.setFiles(new ArrayList<>(Collections.singleton(file)));
106+
datasetFileRepository.removeById(fileId);
107+
dataset.removeFile(file);
108+
datasetRepository.updateById(dataset);
105109
// 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录
106110
if (file.getFilePath().startsWith(dataset.getPath())) {
107111
try {
@@ -111,9 +115,6 @@ public void deleteDatasetFile(String datasetId, String fileId) {
111115
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
112116
}
113117
}
114-
datasetFileRepository.removeById(fileId);
115-
dataset.removeFile(file);
116-
datasetRepository.updateById(dataset);
117118
}
118119

119120
/**

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/FileMetadataService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ private DatasetFile extractFileMetadata(String filePath, String datasetId) throw
110110
.fileType(fileType)
111111
.uploadTime(LocalDateTime.now())
112112
.lastAccessTime(LocalDateTime.now())
113-
.status("UPLOADED")
113+
.status("ACTIVE")
114114
.build();
115115
}
116116

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFile.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22

33
import com.baomidou.mybatisplus.annotation.TableId;
44
import com.baomidou.mybatisplus.annotation.TableName;
5+
import com.fasterxml.jackson.databind.ObjectMapper;
56
import lombok.*;
7+
import org.apache.commons.lang3.StringUtils;
68

79
import java.time.LocalDateTime;
10+
import java.util.Arrays;
11+
import java.util.Collections;
812
import java.util.List;
913

1014
/**
@@ -25,11 +29,25 @@ public class DatasetFile {
2529
private String fileType; // JPG/PNG/DCM/TXT
2630
private Long fileSize; // bytes
2731
private String checkSum;
28-
private List<String> tags;
32+
private String tags;
2933
private String metadata;
3034
private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR
3135
private LocalDateTime uploadTime;
3236
private LocalDateTime lastAccessTime;
3337
private LocalDateTime createdAt;
3438
private LocalDateTime updatedAt;
39+
40+
/**
41+
* 解析标签
42+
*
43+
* @return 标签列表
44+
*/
45+
public List<String> analyzeTag() {
46+
try {
47+
ObjectMapper mapper = new ObjectMapper();
48+
return mapper.readValue(tags, List.class);
49+
} catch (Exception e) {
50+
return Collections.emptyList();
51+
}
52+
}
3553
}

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/StatusConstants.java

Lines changed: 0 additions & 33 deletions
This file was deleted.

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/converter/DatasetConverter.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,24 @@
11
package com.datamate.datamanagement.interfaces.converter;
22

3+
import com.datamate.common.infrastructure.exception.BusinessException;
4+
import com.datamate.common.infrastructure.exception.SystemErrorCode;
35
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
46
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
57
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
68
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
79
import com.datamate.common.domain.model.ChunkUploadRequest;
810
import com.datamate.datamanagement.domain.model.dataset.Dataset;
911
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
12+
import com.fasterxml.jackson.databind.ObjectMapper;
13+
import org.apache.commons.collections4.CollectionUtils;
1014
import org.mapstruct.Mapper;
1115
import org.mapstruct.Mapping;
16+
import org.mapstruct.Named;
1217
import org.mapstruct.factory.Mappers;
1318

19+
import java.util.HashMap;
1420
import java.util.List;
21+
import java.util.Map;
1522

1623
/**
1724
* 数据集文件转换器
@@ -26,6 +33,7 @@ public interface DatasetConverter {
2633
*/
2734
@Mapping(source = "sizeBytes", target = "totalSize")
2835
@Mapping(source = "path", target = "targetLocation")
36+
@Mapping(source = "files", target = "distribution", qualifiedByName = "getDistribution")
2937
DatasetResponse convertToResponse(Dataset dataset);
3038

3139
/**
@@ -49,4 +57,28 @@ public interface DatasetConverter {
4957
* 将数据集文件转换为响应
5058
*/
5159
DatasetFileResponse convertToResponse(DatasetFile datasetFile);
60+
61+
/**
62+
* 获取数据文件的标签分布
63+
*
64+
* @param datasetFiles 数据集文件
65+
* @return 标签分布
66+
*/
67+
@Named("getDistribution")
68+
default Map<String, Long> getDistribution(List<DatasetFile> datasetFiles) {
69+
Map<String, Long> distribution = new HashMap<>();
70+
if (CollectionUtils.isEmpty(datasetFiles)) {
71+
return distribution;
72+
}
73+
for (DatasetFile datasetFile : datasetFiles) {
74+
List<String> tags = datasetFile.analyzeTag();
75+
if (CollectionUtils.isEmpty(tags)) {
76+
continue;
77+
}
78+
for (String tag : tags) {
79+
distribution.put(tag, distribution.getOrDefault(tag, 0L) + 1);
80+
}
81+
}
82+
return distribution;
83+
}
5284
}

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetResponse.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import java.time.LocalDateTime;
77
import java.util.List;
8+
import java.util.Map;
89

910
/**
1011
* 数据集响应DTO
@@ -42,8 +43,8 @@ public class DatasetResponse {
4243
private LocalDateTime updatedAt;
4344
/** 创建者 */
4445
private String createdBy;
45-
/**
46-
* 更新者
47-
*/
46+
/** 更新者 */
4847
private String updatedBy;
48+
/** 分布 */
49+
private Map<String, Long> distribution ;
4950
}

frontend/src/mock/ratio.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import type { RatioTask } from "@/pages/RatioTask/ratio";
1+
import type { RatioTask } from "@/pages/RatioTask/ratio.model.ts";
22

33
export const mockRatioTasks: RatioTask[] = [
44
{

frontend/src/pages/DataManagement/dataset.model.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,13 @@ export interface Dataset {
4949
status: DatasetStatus;
5050
size?: string;
5151
itemCount?: number;
52+
fileCount?: number;
5253
createdBy: string;
5354
createdAt: string;
5455
updatedAt: string;
5556
tags: string[];
5657
targetLocation?: string;
58+
distribution?: Record<string, number>;
5759
}
5860

5961
export interface TagItem {

0 commit comments

Comments
 (0)