Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ default Map<String, Map<String, Long>> getDistribution(List<DatasetFile> dataset
for (DatasetFile datasetFile : datasetFiles) {
List<FileTag> tags = datasetFile.analyzeTag();
if (CollectionUtils.isEmpty(tags)) {
return distribution;
continue;
}
for (FileTag tag : tags) {
Map<String, Long> tagValueMap = distribution.getOrDefault(tag.getFromName(), new HashMap<>());
Expand Down
12 changes: 7 additions & 5 deletions frontend/src/pages/DataManagement/dataset.const.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
FileImage,
FileText,
Video,
Film,
FileCode,
MessageCircleMore,
ImagePlus,
Expand Down Expand Up @@ -45,7 +46,7 @@ export const datasetTypeMap: Record<
label: "文本",
order: 1,
icon: ScanText,
iconColor: "blue",
iconColor: "#A78BFA",
children: [
DatasetSubType.TEXT_DOCUMENT,
DatasetSubType.TEXT_WEB,
Expand All @@ -58,7 +59,7 @@ export const datasetTypeMap: Record<
label: "图像",
order: 2,
icon: Image,
iconColor: "green",
iconColor: "#38BDF8",
children: [DatasetSubType.IMAGE_IMAGE, DatasetSubType.IMAGE_CAPTION],
description: "用于处理和分析图像数据的数据集",
},
Expand All @@ -67,16 +68,16 @@ export const datasetTypeMap: Record<
label: "音频",
order: 3,
icon: Music,
iconColor: "orange",
iconColor: "#F59E0B",
children: [DatasetSubType.AUDIO_AUDIO, DatasetSubType.AUDIO_JSONL],
description: "用于处理和分析音频数据的数据集",
},
[DatasetType.VIDEO]: {
value: DatasetType.VIDEO,
label: "视频",
order: 3,
icon: Video,
iconColor: "purple",
icon: Film,
iconColor: "#22D3EE",
children: [DatasetSubType.VIDEO_VIDEO, DatasetSubType.VIDEO_JSONL],
description: "用于处理和分析视频数据的数据集",
},
Expand Down Expand Up @@ -206,6 +207,7 @@ export function mapDataset(dataset: AnyObject): Dataset {
createdAt: formatDateTime(dataset.createdAt) || "--",
updatedAt: formatDateTime(dataset?.updatedAt) || "--",
icon: IconComponent ? <IconComponent className="w-full h-full" /> : <Database />,
iconColor: iconColor,
status: datasetStatusMap[dataset.status],
statistics: [
{ label: "文件数", value: dataset.fileCount || 0 },
Expand Down
4 changes: 3 additions & 1 deletion frontend/src/pages/RatioTask/Create/CreateRatioTask.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { useNavigate } from "react-router";
import SelectDataset from "@/pages/RatioTask/Create/components/SelectDataset.tsx";
import BasicInformation from "@/pages/RatioTask/Create/components/BasicInformation.tsx";
import RatioConfig from "@/pages/RatioTask/Create/components/RatioConfig.tsx";
import {formatDate} from "@/utils/unit.ts";

export default function CreateRatioTask() {
const navigate = useNavigate();
Expand Down Expand Up @@ -37,10 +38,11 @@ export default function CreateRatioTask() {
}
const totals = String(values.totalTargetCount);
const config = ratioTaskForm.ratioConfigs.map((c) => {
const dateRange = c.dateRange ? [formatDate(c.dateRange[0]), formatDate(c.dateRange[1])] : []
return {
datasetId: c.source,
counts: String(c.quantity ?? 0),
filterConditions: { label: c.labelFilter, dateRange: String(c.dateRange ?? 0)},
filterConditions: { label: c.labelFilter, dateRange: dateRange},
};
});

Expand Down
29 changes: 14 additions & 15 deletions frontend/src/pages/RatioTask/Create/components/RatioConfig.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import {
Select,
Table,
InputNumber,
DatePicker
} from "antd";
import { BarChart3 } from "lucide-react";
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
Expand All @@ -31,7 +32,7 @@ interface RatioConfigItem {
percentage: number;
source: string; // dataset id
labelFilter?: LabelFilter;
dateRange?: number;
dateRange?: [Date | null, Date | null] | null;
}

interface RatioConfigProps {
Expand Down Expand Up @@ -303,20 +304,18 @@ const RatioConfig: FC<RatioConfigProps> = ({
title: "标签更新时间",
dataIndex: "dateRange",
key: "dateRange",
render: (_: any, record: RatioConfigItem) => (
<Select
style={{ width: "140px" }}
placeholder="选择标签更新时间"
value={record.dateRange}
options={TIME_RANGE_OPTIONS}
allowClear
onChange={(value) =>
updateConfig(record.id, {
dateRange: value || undefined,
})
}
/>
),
render: (_: any, record: RatioConfigItem) => {
return (
<DatePicker.RangePicker
value={record.dateRange as any}
onChange={(date) => {
updateConfig(record.id, { dateRange: date });
}}
placeholder={["开始时间", "结束时间"]}
allowClear
/>
);
},
},
{
title: "数量",
Expand Down
21 changes: 13 additions & 8 deletions runtime/datamate-python/app/module/ratio/schema/ratio_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,25 @@ class LabelFilter(BaseModel):
value: Optional[str] = Field(None, description="标签值")

class FilterCondition(BaseModel):
date_range: Optional[str] = Field(None, description="数据范围", alias="dateRange")
date_range: Optional[List[str]] = Field(None, description="数据范围", alias="dateRange")
label: Optional[LabelFilter] = Field(None, description="标签")

@field_validator("date_range")
@classmethod
def validate_date_range(cls, v: Optional[str]) -> Optional[str]:
# ensure it's a numeric string if provided
if not v:
return v
def validate_date_range(cls, date_range: Optional[List[str]]) -> Optional[List[str]]:
# ensure it's a date range if provided
if not date_range or len(date_range) == 0:
return date_range
if len(date_range) != 2:
raise ValueError("date_range must be a list of two date strings: [start, end]")
try:
int(v)
return v
start = datetime.fromisoformat(date_range[0])
end = datetime.fromisoformat(date_range[1])
if start > end:
raise ValueError("date_range start must be earlier than or equal to end")
return date_range
except (ValueError, TypeError) as e:
raise ValueError("date_range must be a numeric string")
raise ValueError("date_range items must be ISO date strings (e.g. YYYY-MM-DD)")

class Config:
# allow population by field name when constructing model programmatically
Expand Down
13 changes: 6 additions & 7 deletions runtime/datamate-python/app/module/ratio/service/ratio_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,14 +271,13 @@ def _filter_file(file: DatasetFiles, conditions: FilterCondition) -> bool:
logger.info(f"start filter file: {file}, conditions: {conditions}")

# Check data range condition if provided
if conditions.date_range:
if conditions.date_range and len(conditions.date_range) == 2:
try:
from datetime import datetime, timedelta
data_range_days = int(conditions.date_range)
if data_range_days > 0:
cutoff_date = datetime.now() - timedelta(days=data_range_days)
if file.tags_updated_at and file.tags_updated_at < cutoff_date:
return False
start_at = datetime.fromisoformat(conditions.date_range[0])
end_at = datetime.fromisoformat(conditions.date_range[1])
if file.tags_updated_at and (file.tags_updated_at < start_at or file.tags_updated_at > end_at):
return False
except (ValueError, TypeError) as e:
logger.warning(f"Invalid data_range value: {conditions.date_range}", e)
return False
Expand All @@ -294,7 +293,7 @@ def _filter_file(file: DatasetFiles, conditions: FilterCondition) -> bool:
for tag in all_tags:
if conditions.label.label and tag.get("label") != conditions.label.label:
continue
if conditions.label.value is None:
if conditions.label.value is None or len(conditions.label.value) == 0:
return True
if tag.get("value") == conditions.label.value:
return True
Expand Down
Loading