Skip to content

Commit a15a613

Browse files
authored
fix the ratio task config (#224)
* fix: fix the dataset card icon * fix: fix the dataset file tag distribution and ratio task * refactor: change dateRange config from latest to start-end
1 parent 3f1ad6a commit a15a613

File tree

6 files changed

+44
-37
lines changed

6 files changed

+44
-37
lines changed

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/converter/DatasetConverter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ default Map<String, Map<String, Long>> getDistribution(List<DatasetFile> dataset
7878
for (DatasetFile datasetFile : datasetFiles) {
7979
List<FileTag> tags = datasetFile.analyzeTag();
8080
if (CollectionUtils.isEmpty(tags)) {
81-
return distribution;
81+
continue;
8282
}
8383
for (FileTag tag : tags) {
8484
Map<String, Long> tagValueMap = distribution.getOrDefault(tag.getFromName(), new HashMap<>());

frontend/src/pages/DataManagement/dataset.const.tsx

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import {
1717
FileImage,
1818
FileText,
1919
Video,
20+
Film,
2021
FileCode,
2122
MessageCircleMore,
2223
ImagePlus,
@@ -45,7 +46,7 @@ export const datasetTypeMap: Record<
4546
label: "文本",
4647
order: 1,
4748
icon: ScanText,
48-
iconColor: "blue",
49+
iconColor: "#A78BFA",
4950
children: [
5051
DatasetSubType.TEXT_DOCUMENT,
5152
DatasetSubType.TEXT_WEB,
@@ -58,7 +59,7 @@ export const datasetTypeMap: Record<
5859
label: "图像",
5960
order: 2,
6061
icon: Image,
61-
iconColor: "green",
62+
iconColor: "#38BDF8",
6263
children: [DatasetSubType.IMAGE_IMAGE, DatasetSubType.IMAGE_CAPTION],
6364
description: "用于处理和分析图像数据的数据集",
6465
},
@@ -67,16 +68,16 @@ export const datasetTypeMap: Record<
6768
label: "音频",
6869
order: 3,
6970
icon: Music,
70-
iconColor: "orange",
71+
iconColor: "#F59E0B",
7172
children: [DatasetSubType.AUDIO_AUDIO, DatasetSubType.AUDIO_JSONL],
7273
description: "用于处理和分析音频数据的数据集",
7374
},
7475
[DatasetType.VIDEO]: {
7576
value: DatasetType.VIDEO,
7677
label: "视频",
7778
order: 3,
78-
icon: Video,
79-
iconColor: "purple",
79+
icon: Film,
80+
iconColor: "#22D3EE",
8081
children: [DatasetSubType.VIDEO_VIDEO, DatasetSubType.VIDEO_JSONL],
8182
description: "用于处理和分析视频数据的数据集",
8283
},
@@ -206,6 +207,7 @@ export function mapDataset(dataset: AnyObject): Dataset {
206207
createdAt: formatDateTime(dataset.createdAt) || "--",
207208
updatedAt: formatDateTime(dataset?.updatedAt) || "--",
208209
icon: IconComponent ? <IconComponent className="w-full h-full" /> : <Database />,
210+
iconColor: iconColor,
209211
status: datasetStatusMap[dataset.status],
210212
statistics: [
211213
{ label: "文件数", value: dataset.fileCount || 0 },

frontend/src/pages/RatioTask/Create/CreateRatioTask.tsx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { useNavigate } from "react-router";
77
import SelectDataset from "@/pages/RatioTask/Create/components/SelectDataset.tsx";
88
import BasicInformation from "@/pages/RatioTask/Create/components/BasicInformation.tsx";
99
import RatioConfig from "@/pages/RatioTask/Create/components/RatioConfig.tsx";
10+
import {formatDate} from "@/utils/unit.ts";
1011

1112
export default function CreateRatioTask() {
1213
const navigate = useNavigate();
@@ -37,10 +38,11 @@ export default function CreateRatioTask() {
3738
}
3839
const totals = String(values.totalTargetCount);
3940
const config = ratioTaskForm.ratioConfigs.map((c) => {
41+
const dateRange = c.dateRange ? [formatDate(c.dateRange[0]), formatDate(c.dateRange[1])] : []
4042
return {
4143
datasetId: c.source,
4244
counts: String(c.quantity ?? 0),
43-
filterConditions: { label: c.labelFilter, dateRange: String(c.dateRange ?? 0)},
45+
filterConditions: { label: c.labelFilter, dateRange: dateRange},
4446
};
4547
});
4648

frontend/src/pages/RatioTask/Create/components/RatioConfig.tsx

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
Select,
77
Table,
88
InputNumber,
9+
DatePicker
910
} from "antd";
1011
import { BarChart3 } from "lucide-react";
1112
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
@@ -31,7 +32,7 @@ interface RatioConfigItem {
3132
percentage: number;
3233
source: string; // dataset id
3334
labelFilter?: LabelFilter;
34-
dateRange?: number;
35+
dateRange?: [Date | null, Date | null] | null;
3536
}
3637

3738
interface RatioConfigProps {
@@ -303,20 +304,18 @@ const RatioConfig: FC<RatioConfigProps> = ({
303304
title: "标签更新时间",
304305
dataIndex: "dateRange",
305306
key: "dateRange",
306-
render: (_: any, record: RatioConfigItem) => (
307-
<Select
308-
style={{ width: "140px" }}
309-
placeholder="选择标签更新时间"
310-
value={record.dateRange}
311-
options={TIME_RANGE_OPTIONS}
312-
allowClear
313-
onChange={(value) =>
314-
updateConfig(record.id, {
315-
dateRange: value || undefined,
316-
})
317-
}
318-
/>
319-
),
307+
render: (_: any, record: RatioConfigItem) => {
308+
return (
309+
<DatePicker.RangePicker
310+
value={record.dateRange as any}
311+
onChange={(date) => {
312+
updateConfig(record.id, { dateRange: date });
313+
}}
314+
placeholder={["开始时间", "结束时间"]}
315+
allowClear
316+
/>
317+
);
318+
},
320319
},
321320
{
322321
title: "数量",

runtime/datamate-python/app/module/ratio/schema/ratio_task.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,25 @@ class LabelFilter(BaseModel):
1212
value: Optional[str] = Field(None, description="标签值")
1313

1414
class FilterCondition(BaseModel):
15-
date_range: Optional[str] = Field(None, description="数据范围", alias="dateRange")
15+
date_range: Optional[List[str]] = Field(None, description="数据范围", alias="dateRange")
1616
label: Optional[LabelFilter] = Field(None, description="标签")
1717

1818
@field_validator("date_range")
1919
@classmethod
20-
def validate_date_range(cls, v: Optional[str]) -> Optional[str]:
21-
# ensure it's a numeric string if provided
22-
if not v:
23-
return v
20+
def validate_date_range(cls, date_range: Optional[List[str]]) -> Optional[List[str]]:
21+
# ensure it's a date range if provided
22+
if not date_range or len(date_range) == 0:
23+
return date_range
24+
if len(date_range) != 2:
25+
raise ValueError("date_range must be a list of two date strings: [start, end]")
2426
try:
25-
int(v)
26-
return v
27+
start = datetime.fromisoformat(date_range[0])
28+
end = datetime.fromisoformat(date_range[1])
29+
if start > end:
30+
raise ValueError("date_range start must be earlier than or equal to end")
31+
return date_range
2732
except (ValueError, TypeError) as e:
28-
raise ValueError("date_range must be a numeric string")
33+
raise ValueError("date_range items must be ISO date strings (e.g. YYYY-MM-DD)")
2934

3035
class Config:
3136
# allow population by field name when constructing model programmatically

runtime/datamate-python/app/module/ratio/service/ratio_task.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -271,14 +271,13 @@ def _filter_file(file: DatasetFiles, conditions: FilterCondition) -> bool:
271271
logger.info(f"start filter file: {file}, conditions: {conditions}")
272272

273273
# Check data range condition if provided
274-
if conditions.date_range:
274+
if conditions.date_range and len(conditions.date_range) == 2:
275275
try:
276276
from datetime import datetime, timedelta
277-
data_range_days = int(conditions.date_range)
278-
if data_range_days > 0:
279-
cutoff_date = datetime.now() - timedelta(days=data_range_days)
280-
if file.tags_updated_at and file.tags_updated_at < cutoff_date:
281-
return False
277+
start_at = datetime.fromisoformat(conditions.date_range[0])
278+
end_at = datetime.fromisoformat(conditions.date_range[1])
279+
if file.tags_updated_at and (file.tags_updated_at < start_at or file.tags_updated_at > end_at):
280+
return False
282281
except (ValueError, TypeError) as e:
283282
logger.warning(f"Invalid data_range value: {conditions.date_range}", e)
284283
return False
@@ -294,7 +293,7 @@ def _filter_file(file: DatasetFiles, conditions: FilterCondition) -> bool:
294293
for tag in all_tags:
295294
if conditions.label.label and tag.get("label") != conditions.label.label:
296295
continue
297-
if conditions.label.value is None:
296+
if conditions.label.value is None or len(conditions.label.value) == 0:
298297
return True
299298
if tag.get("value") == conditions.label.value:
300299
return True

0 commit comments

Comments
 (0)