Skip to content

Commit 60e2289

Browse files
authored
fix:修复配比任务操作问题 (#66)
* fix:配比任务需要能够跳转到目标数据集 * feature:增加配比任务详情接口 * fix:删除不存在的配比详情页面 * fix:使用正式的逻辑来展示标签 * fix:参数默认值去掉多余的- * fix:修复配比任务相关操作
1 parent 28b7c63 commit 60e2289

File tree

9 files changed

+121
-173
lines changed

9 files changed

+121
-173
lines changed

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFile.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
import com.baomidou.mybatisplus.annotation.TableId;
44
import com.baomidou.mybatisplus.annotation.TableName;
5+
import com.fasterxml.jackson.core.type.TypeReference;
56
import com.fasterxml.jackson.databind.ObjectMapper;
67
import lombok.*;
8+
import lombok.extern.slf4j.Slf4j;
79
import org.apache.commons.lang3.StringUtils;
810

911
import java.time.LocalDateTime;
@@ -17,6 +19,7 @@
1719
@Getter
1820
@Setter
1921
@Builder
22+
@Slf4j
2023
@NoArgsConstructor
2124
@AllArgsConstructor
2225
@TableName("t_dm_dataset_files")
@@ -42,11 +45,12 @@ public class DatasetFile {
4245
*
4346
* @return 标签列表
4447
*/
45-
public List<String> analyzeTag() {
48+
public List<FileTag> analyzeTag() {
4649
try {
4750
ObjectMapper mapper = new ObjectMapper();
48-
return mapper.readValue(tags, List.class);
51+
return mapper.readValue(tags, new TypeReference<List<FileTag>>() {});
4952
} catch (Exception e) {
53+
log.error(e.getMessage(), e);
5054
return Collections.emptyList();
5155
}
5256
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package com.datamate.datamanagement.domain.model.dataset;
2+
3+
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
4+
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
5+
import com.fasterxml.jackson.databind.annotation.JsonNaming;
6+
import lombok.AllArgsConstructor;
7+
import lombok.Getter;
8+
import lombok.NoArgsConstructor;
9+
import lombok.Setter;
10+
import org.apache.commons.lang3.StringUtils;
11+
12+
import java.util.ArrayList;
13+
import java.util.List;
14+
import java.util.Map;
15+
16+
@Getter
17+
@Setter
18+
@NoArgsConstructor
19+
@AllArgsConstructor
20+
@JsonIgnoreProperties(ignoreUnknown = true)
21+
@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class)
22+
public class FileTag {
23+
private Map<String, Object> value;
24+
25+
private String type;
26+
27+
private String id;
28+
29+
private String fromName;
30+
31+
public List<String> getTags() {
32+
List<String> tags = new ArrayList<>();
33+
Object tagValues = value.get(type);
34+
if (tagValues instanceof List) {
35+
for (Object tag : (List<?>) tagValues) {
36+
if (tag instanceof String) {
37+
tags.add((String) tag);
38+
}
39+
}
40+
} else if (tagValues instanceof String) {
41+
tags.add((String) tagValues);
42+
}
43+
if(StringUtils.isNotEmpty(fromName)) {
44+
return tags.stream().map(tag -> fromName + " " + tag).toList();
45+
}
46+
return tags;
47+
}
48+
}

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/converter/DatasetConverter.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.datamate.common.infrastructure.exception.BusinessException;
44
import com.datamate.common.infrastructure.exception.SystemErrorCode;
5+
import com.datamate.datamanagement.domain.model.dataset.FileTag;
56
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
67
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
78
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
@@ -71,12 +72,12 @@ default Map<String, Long> getDistribution(List<DatasetFile> datasetFiles) {
7172
return distribution;
7273
}
7374
for (DatasetFile datasetFile : datasetFiles) {
74-
List<String> tags = datasetFile.analyzeTag();
75+
List<FileTag> tags = datasetFile.analyzeTag();
7576
if (CollectionUtils.isEmpty(tags)) {
76-
continue;
77+
return distribution;
7778
}
78-
for (String tag : tags) {
79-
distribution.put(tag, distribution.getOrDefault(tag, 0L) + 1);
79+
for (FileTag tag : tags) {
80+
tag.getTags().forEach(tagName -> distribution.put(tagName, distribution.getOrDefault(tagName, 0L) + 1));
8081
}
8182
}
8283
return distribution;

backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@
5151
public class RagEtlService {
5252
private static final Semaphore SEMAPHORE = new Semaphore(10);
5353

54-
@Value("${datamate.rag.milvus-host:-milvus-standalone}")
54+
@Value("${datamate.rag.milvus-host:milvus-standalone}")
5555
private String milvusHost;
56-
@Value("${datamate.rag.milvus-port:-19530}")
56+
@Value("${datamate.rag.milvus-port:19530}")
5757
private int milvusPort;
5858

5959
private final RagFileRepository ragFileRepository;

frontend/src/pages/RatioTask/Create/CreateRatioTask.tsx

Lines changed: 0 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -76,116 +76,6 @@ export default function CreateRatioTask() {
7676
setCreating(false);
7777
}
7878
};
79-
const totalConfigured = useMemo(
80-
() =>
81-
ratioTaskForm?.ratioConfigs?.reduce?.(
82-
(sum, c) => sum + (c.quantity || 0),
83-
0
84-
) || 0,
85-
[ratioTaskForm.ratioConfigs]
86-
);
87-
88-
// dataset selection is handled inside SelectDataset via onSelectedDatasetsChange
89-
90-
const updateRatioConfig = (source: string, quantity: number) => {
91-
setRatioTaskForm((prev) => {
92-
const existingIndex = prev.ratioConfigs.findIndex(
93-
(config) => config.source === source
94-
);
95-
const totalOtherQuantity = prev.ratioConfigs
96-
.filter((config) => config.source !== source)
97-
.reduce((sum, config) => sum + config.quantity, 0);
98-
99-
const newConfig = {
100-
id: source,
101-
name: source,
102-
type: prev.ratioType,
103-
quantity: Math.min(
104-
quantity,
105-
prev.totalTargetCount - totalOtherQuantity
106-
),
107-
percentage: Math.round((quantity / prev.totalTargetCount) * 100),
108-
source,
109-
};
110-
111-
if (existingIndex >= 0) {
112-
const newConfigs = [...prev.ratioConfigs];
113-
newConfigs[existingIndex] = newConfig;
114-
return { ...prev, ratioConfigs: newConfigs };
115-
} else {
116-
return { ...prev, ratioConfigs: [...prev.ratioConfigs, newConfig] };
117-
}
118-
});
119-
};
120-
121-
const generateAutoRatio = () => {
122-
const selectedCount = ratioTaskForm.selectedDatasets.length;
123-
if (selectedCount === 0) return;
124-
125-
const baseQuantity = Math.floor(
126-
ratioTaskForm.totalTargetCount / selectedCount
127-
);
128-
const remainder = ratioTaskForm.totalTargetCount % selectedCount;
129-
130-
const newConfigs = ratioTaskForm.selectedDatasets.map(
131-
(datasetId, index) => {
132-
const quantity = baseQuantity + (index < remainder ? 1 : 0);
133-
return {
134-
id: datasetId,
135-
name: datasetId,
136-
type: ratioTaskForm.ratioType,
137-
quantity,
138-
percentage: Math.round(
139-
(quantity / ratioTaskForm.totalTargetCount) * 100
140-
),
141-
source: datasetId,
142-
};
143-
}
144-
);
145-
146-
setRatioTaskForm((prev) => ({ ...prev, ratioConfigs: newConfigs }));
147-
};
148-
149-
// 标签模式下,更新某数据集的某个标签的数量
150-
const updateLabelRatioConfig = (
151-
datasetId: string,
152-
label: string,
153-
quantity: number
154-
) => {
155-
const sourceKey = `${datasetId}_${label}`;
156-
setRatioTaskForm((prev) => {
157-
const existingIndex = prev.ratioConfigs.findIndex(
158-
(c) => c.source === sourceKey
159-
);
160-
const totalOtherQuantity = prev.ratioConfigs
161-
.filter((c) => c.source !== sourceKey)
162-
.reduce((sum, c) => sum + c.quantity, 0);
163-
164-
const dist = distributions[datasetId] || {};
165-
const labelMax = dist[label] ?? Infinity;
166-
const cappedQuantity = Math.max(
167-
0,
168-
Math.min(quantity, prev.totalTargetCount - totalOtherQuantity, labelMax)
169-
);
170-
171-
const newConfig = {
172-
id: sourceKey,
173-
name: label,
174-
type: "label",
175-
quantity: cappedQuantity,
176-
percentage: Math.round((cappedQuantity / prev.totalTargetCount) * 100),
177-
source: sourceKey,
178-
};
179-
180-
if (existingIndex >= 0) {
181-
const newConfigs = [...prev.ratioConfigs];
182-
newConfigs[existingIndex] = newConfig;
183-
return { ...prev, ratioConfigs: newConfigs };
184-
} else {
185-
return { ...prev, ratioConfigs: [...prev.ratioConfigs, newConfig] };
186-
}
187-
});
188-
};
18979

19080
const handleValuesChange = (_, allValues) => {
19181
setRatioTaskForm({ ...ratioTaskForm, ...allValues });

frontend/src/pages/RatioTask/Home/RatioTask.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ export default function RatioTasksPage() {
123123
<Button
124124
type="text"
125125
icon={op.icon}
126-
onClick={() => op.onClick(task.id)}
126+
onClick={() => op.onClick(task)}
127127
/>
128128
</Tooltip>
129129
))}

frontend/src/pages/RatioTask/ratio.api.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ export function createRatioTaskUsingPost(data: any) {
1616
}
1717

1818
// 删除配比任务(支持批量)
19-
export function deleteRatioTasksUsingDelete(ids: string[]) {
20-
const qs = (ids || []).map((id) => `ids=${encodeURIComponent(id)}`).join("&");
21-
const url = qs ? `/api/synthesis/ratio-task?${qs}` : "/api/synthesis/ratio-task";
19+
export function deleteRatioTasksUsingDelete(id: string) {
20+
const url = `/api/synthesis/ratio-task?ids=${id}`;
2221
return del(url);
2322
}

runtime/datamate-python/app/module/dataset/schema/dataset_file.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,27 @@ class PagedDatasetFileResponse(BaseModel):
2525
totalPages: int = Field(..., description="总页数")
2626
page: int = Field(..., description="当前页码")
2727
size: int = Field(..., description="每页大小")
28-
28+
29+
class DatasetFileTag(BaseModel):
30+
id: str = Field(..., description="标签ID")
31+
type: str = Field(..., description="类型")
32+
from_name: str = Field(..., description="标签名称")
33+
value: dict = Field(..., description="标签值")
34+
35+
def get_tags(self) -> List[str]:
36+
tags = []
37+
# 如果 value 是字典类型,根据 type 获取对应的值
38+
tag_values = self.value.get(self.type, [])
39+
40+
# 处理标签值
41+
if isinstance(tag_values, list):
42+
for tag in tag_values:
43+
if isinstance(tag, str):
44+
tags.append(str(tag))
45+
elif isinstance(tag_values, str):
46+
tags.append(tag_values)
47+
# 如果 from_name 不为空,添加前缀
48+
if self.from_name:
49+
tags = [f"{self.from_name} {tag}" for tag in tags]
50+
51+
return tags

runtime/datamate-python/app/module/synthesis/service/ratio_task.py

Lines changed: 33 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from typing import List, Optional, Dict, Any
22
import random
3+
import json
34
import os
45
import shutil
56
import asyncio
@@ -12,6 +13,7 @@
1213
from app.db.models.ratio_task import RatioInstance, RatioRelation
1314
from app.db.models import Dataset, DatasetFiles
1415
from app.db.session import AsyncSessionLocal
16+
from app.module.dataset.schema.dataset_file import DatasetFileTag
1517

1618
logger = get_logger(__name__)
1719

@@ -218,65 +220,46 @@ def _parse_required_tags(conditions: Optional[str]) -> set[str]:
218220
"""
219221
if not conditions:
220222
return set()
221-
raw = conditions.replace("\n", " ")
222-
seps = [",", ";", " "]
223-
tokens = [raw]
224-
for sep in seps:
225-
nxt = []
226-
for t in tokens:
227-
nxt.extend(t.split(sep))
228-
tokens = nxt
229-
return {t.strip() for t in tokens if t and t.strip()}
223+
data = json.loads(conditions)
224+
required_tags = set()
225+
if data.get("label"):
226+
required_tags.add(data["label"])
227+
return required_tags
230228

231229
@staticmethod
232-
def _file_contains_tags(f: DatasetFiles, required: set[str]) -> bool:
230+
def _file_contains_tags(file: DatasetFiles, required: set[str]) -> bool:
233231
if not required:
234232
return True
235-
tags = f.tags
233+
tags = file.tags
236234
if not tags:
237235
return False
238236
try:
239237
# tags could be a list of strings or list of objects with 'name'
240-
tag_names = set()
241-
if isinstance(tags, list):
242-
for item in tags:
243-
if isinstance(item, str):
244-
tag_names.add(item)
245-
elif isinstance(item, dict):
246-
name = item.get("name") or item.get("label") or item.get("tag")
247-
if isinstance(name, str):
248-
tag_names.add(name)
249-
elif isinstance(tags, dict):
250-
# flat dict of name->... treat keys as tags
251-
tag_names = set(map(str, tags.keys()))
252-
else:
253-
return False
254-
logger.info(f">>>>>{tags}>>>>>{required}, {tag_names}")
238+
tag_names = RatioTaskService.get_all_tags(tags)
255239
return required.issubset(tag_names)
256-
except Exception:
240+
except Exception as e:
241+
logger.exception(f"Failed to get tags for {file}", e)
257242
return False
258243

259244
@staticmethod
260-
async def get_new_file(f, rel: RatioRelation, target_ds: Dataset) -> DatasetFiles:
261-
new_path = f.file_path
262-
src_prefix = f"/dataset/{rel.source_dataset_id}"
263-
if isinstance(f.file_path, str) and f.file_path.startswith(src_prefix):
264-
dst_prefix = f"/dataset/{target_ds.id}"
265-
new_path = f.file_path.replace(src_prefix, dst_prefix, 1)
266-
dst_dir = os.path.dirname(new_path)
267-
# Ensure directory and copy the file in a thread to avoid blocking the event loop
268-
await asyncio.to_thread(os.makedirs, dst_dir, exist_ok=True)
269-
await asyncio.to_thread(shutil.copy2, f.file_path, new_path)
270-
271-
new_file = DatasetFiles(
272-
dataset_id=target_ds.id, # type: ignore
273-
file_name=f.file_name,
274-
file_path=new_path,
275-
file_type=f.file_type,
276-
file_size=f.file_size,
277-
check_sum=f.check_sum,
278-
tags=f.tags,
279-
dataset_filemetadata=f.dataset_filemetadata,
280-
status="ACTIVE",
281-
)
282-
return new_file
245+
def get_all_tags(tags) -> set[str]:
246+
"""获取所有处理后的标签字符串列表"""
247+
all_tags = set()
248+
if not tags:
249+
return all_tags
250+
251+
file_tags = []
252+
for tag_data in tags:
253+
# 处理可能的命名风格转换(下划线转驼峰)
254+
processed_data = {}
255+
for key, value in tag_data.items():
256+
# 将驼峰转为下划线以匹配 Pydantic 模型字段
257+
processed_data[key] = value
258+
# 创建 DatasetFileTag 对象
259+
file_tag = DatasetFileTag(**processed_data)
260+
file_tags.append(file_tag)
261+
262+
for file_tag in file_tags:
263+
for tag_data in file_tag.get_tags():
264+
all_tags.add(tag_data)
265+
return all_tags

0 commit comments

Comments
 (0)