diff --git a/.github/workflows/docker-image-backend-python.yml b/.github/workflows/docker-image-backend-python.yml index a29ba942..65d9755d 100644 --- a/.github/workflows/docker-image-backend-python.yml +++ b/.github/workflows/docker-image-backend-python.yml @@ -5,12 +5,14 @@ on: branches: [ "main" ] paths: - 'scripts/images/datamate-python/**' + - 'runtime/datamate-python/**' - '.github/workflows/docker-image-backend-python.yml' - '.github/workflows/docker-images-reusable.yml' pull_request: branches: [ "main" ] paths: - 'scripts/images/datamate-python/**' + - 'runtime/datamate-python/**' - '.github/workflows/docker-image-backend-python.yml' - '.github/workflows/docker-images-reusable.yml' workflow_dispatch: diff --git a/deployment/docker/milvus/docker-compose.yml b/deployment/docker/milvus/docker-compose.yml index 9385ce09..6d17b2ef 100644 --- a/deployment/docker/milvus/docker-compose.yml +++ b/deployment/docker/milvus/docker-compose.yml @@ -10,6 +10,7 @@ services: volumes: - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd + restart: always networks: - datamate healthcheck: diff --git a/frontend/src/components/business/DatasetFileTransfer.tsx b/frontend/src/components/business/DatasetFileTransfer.tsx index 42bca6c0..d2e54c42 100644 --- a/frontend/src/components/business/DatasetFileTransfer.tsx +++ b/frontend/src/components/business/DatasetFileTransfer.tsx @@ -1,4 +1,4 @@ -import React, { useEffect } from "react"; +import React, { useCallback, useEffect } from "react"; import { Button, Input, Table } from "antd"; import { RightOutlined } from "@ant-design/icons"; import { mapDataset } from "@/pages/DataManagement/dataset.const"; @@ -19,6 +19,7 @@ interface DatasetFileTransferProps open: boolean; selectedFilesMap: { [key: string]: DatasetFile }; onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void; + onDatasetSelect?: (dataset: Dataset | null) => void; } const fileCols = [ @@ -48,6 +49,7 @@ const DatasetFileTransfer: React.FC = ({ open, selectedFilesMap, onSelectedFilesChange, + onDatasetSelect, ...props }) => { const [datasets, setDatasets] = React.useState([]); @@ -96,7 +98,7 @@ const DatasetFileTransfer: React.FC = ({ 300 ); - const fetchFiles = async () => { + const fetchFiles = useCallback(async () => { if (!selectedDataset) return; const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, { page: filesPagination.current - 1, @@ -104,23 +106,25 @@ const DatasetFileTransfer: React.FC = ({ keyword: filesSearch, }); setFiles( - data.content.map((item) => ({ + (data.content || []).map((item: DatasetFile) => ({ ...item, key: item.id, datasetName: selectedDataset.name, - })) || [] + })) ); setFilesPagination((prev) => ({ ...prev, total: data.totalElements, })); - }; + }, [filesPagination.current, filesPagination.pageSize, filesSearch, selectedDataset]); useEffect(() => { - if (selectedDataset) { - fetchFiles(); - } - }, [selectedDataset]); + fetchFiles().catch(() => {}); + }, [fetchFiles]); + + useEffect(() => { + onDatasetSelect?.(selectedDataset); + }, [selectedDataset, onDatasetSelect]); const toggleSelectFile = (record: DatasetFile) => { if (!selectedFilesMap[record.id]) { @@ -147,8 +151,9 @@ const DatasetFileTransfer: React.FC = ({ setShowFiles(false); setSelectedDataset(null); setDatasetSelections([]); + onDatasetSelect?.(null); } - }, [open]); + }, [open, onDatasetSelect]); const datasetCols = [ { @@ -206,7 +211,15 @@ const DatasetFileTransfer: React.FC = ({ })} dataSource={datasets} columns={datasetCols} - pagination={datasetPagination} + pagination={{ + ...datasetPagination, + onChange: (page, pageSize) => + setDatasetPagination({ + current: page, + pageSize: pageSize || datasetPagination.pageSize, + total: datasetPagination.total, + }), + }} /> @@ -231,21 +244,11 @@ const DatasetFileTransfer: React.FC = ({ })} rowSelection={{ type: "checkbox", - onSelectAll: (selected, _, changeRows) => { - const newSelectedFiles = { ...selectedFilesMap }; - if (selected) { - changeRows.forEach((row) => { - newSelectedFiles[row.id] = row; - }); - } else { - changeRows.forEach((row) => { - delete newSelectedFiles[row.id]; - }); - } - onSelectedFilesChange(newSelectedFiles); - }, selectedRowKeys: Object.keys(selectedFilesMap), onSelect: toggleSelectFile, + getCheckboxProps: (record: DatasetFile) => ({ + name: record.fileName, + }), }} /> diff --git a/frontend/src/pages/SynthesisTask/CreateTask.tsx b/frontend/src/pages/SynthesisTask/CreateTask.tsx index 72eae101..357f75fb 100644 --- a/frontend/src/pages/SynthesisTask/CreateTask.tsx +++ b/frontend/src/pages/SynthesisTask/CreateTask.tsx @@ -1,389 +1,242 @@ import { useEffect, useState } from "react"; -import type { DatasetFile } from "@/pages/DataManagement/dataset.model"; -import { - Steps, - Card, - Select, - Input, - Checkbox, - Button, - Badge, - Divider, - Radio, - Form, - message, -} from "antd"; -import { - Eye, - Trash2, - Settings, - ArrowLeft, - ArrowRight, - Play, - Edit, - Copy, - Save, - RefreshCw, - ChevronDown, - ChevronRight, - Search, - CheckCircle, - Code, - X, - MoreHorizontal, - Activity, - MessageSquare, - Brain, -} from "lucide-react"; +import type { Dataset, DatasetFile } from "@/pages/DataManagement/dataset.model"; +import { Steps, Card, Select, Input, Checkbox, Button, Form, message } from "antd"; +import { Eye, ArrowLeft, ArrowRight, Play, Search, MoreHorizontal } from "lucide-react"; import { Link, useNavigate } from "react-router"; import { queryDatasetsUsingGet } from "../DataManagement/dataset.api"; import DatasetFileTransfer from "@/components/business/DatasetFileTransfer"; +import { createSynthesisTaskUsingPost, getPromptByTypeUsingGet } from "./synthesis-api"; +import { queryModelListUsingGet } from "@/pages/SettingsPage/settings.apis"; +import type { ModelI } from "@/pages/SettingsPage/ModelAccess"; const { TextArea } = Input; +interface CreateTaskFormValues { + name: string; + sourceDataset: string; + description?: string; +} + +interface CreateTaskApiResponse { + code?: string | number; + message?: string; + data?: unknown; + success?: boolean; +} + export default function SynthesisTaskCreate() { const navigate = useNavigate(); const [form] = Form.useForm(); - const [searchQuery, setSearchQuery] = useState(""); const [createStep, setCreateStep] = useState(1); const [selectedFiles, setSelectedFiles] = useState([]); - const [selectedMap, setSelectedMap] = useState>( - {} - ); - const [files] = useState([]); - const [selectedSynthesisTypes, setSelectedSynthesisTypes] = useState< - string[] - >(["qa_judge"]); - const [showDebugCard, setShowDebugCard] = useState(false); - const [debugStepId, setDebugStepId] = useState(null); - const [expandedTypes, setExpandedTypes] = useState([ - "qa", - "distillation", - ]); + const [selectedMap, setSelectedMap] = useState>({}); + const [selectedDataset, setSelectedDataset] = useState(null); + const [selectedSynthesisTypes, setSelectedSynthesisTypes] = useState(["qa"]); + const [taskType, setTaskType] = useState<"qa" | "cot">("qa"); + const [promptTemplate, setPromptTemplate] = useState(""); + const [submitting, setSubmitting] = useState(false); + const [modelOptions, setModelOptions] = useState<{ label: string; value: string }[]>([]); + const [modelsLoading, setModelsLoading] = useState(false); + const [selectedModel, setSelectedModel] = useState(undefined); + const [sliceConfig, setSliceConfig] = useState({ + processType: "DEFAULT_CHUNK" as + | "DEFAULT_CHUNK" + | "CHAPTER_CHUNK" + | "PARAGRAPH_CHUNK" + | "FIXED_LENGTH_CHUNK" + | "CUSTOM_SEPARATOR_CHUNK", + chunkSize: 500, + overlapSize: 50, + delimiter: "", + }); + const sliceOptions = [ + { label: "默认分块", value: "DEFAULT_CHUNK" }, + { label: "按章节分块", value: "CHAPTER_CHUNK" }, + { label: "按段落分块", value: "PARAGRAPH_CHUNK" }, + { label: "固定长度分块", value: "FIXED_LENGTH_CHUNK" }, + { label: "自定义分隔符分块", value: "CUSTOM_SEPARATOR_CHUNK" }, + ]; const fetchDatasets = async () => { const { data } = await queryDatasetsUsingGet({ page: 1, size: 1000 }); - setDatasets(data.content || []); + return data; + }; + + const fetchPrompt = async (type: "qa" | "cot") => { + try { + const synthTypeParam = type.toUpperCase(); + const res = await getPromptByTypeUsingGet(synthTypeParam); + const prompt = typeof res === "string" ? res : (res as { data?: string })?.data ?? ""; + setPromptTemplate(prompt || ""); + } catch (e) { + console.error(e); + message.error("获取提示词模板失败"); + setPromptTemplate(""); + } }; useEffect(() => { fetchDatasets(); }, []); + useEffect(() => { + fetchPrompt(taskType); + }, [taskType]); + + useEffect(() => { + const loadModels = async () => { + setModelsLoading(true); + try { + const { data } = await queryModelListUsingGet({ page: 0, size: 1000 }); + const options = (data?.content || []).map((model: ModelI) => ({ + label: `${model.modelName} (${model.provider})`, + value: model.id, + })); + setModelOptions(options); + } catch (error) { + console.error("加载模型列表失败", error); + } finally { + setModelsLoading(false); + } + }; + loadModels(); + }, []); + + useEffect(() => { + if (!selectedModel && modelOptions.length > 0) { + setSelectedModel(modelOptions[0].value); + } + }, [modelOptions, selectedModel]); + // 表单数据 - const [formValues, setFormValues] = useState({ + const [formValues, setFormValues] = useState({ name: "", sourceDataset: "", - targetCount: 1000, description: "", - executionMode: "immediate", - scheduleStrategy: "", - outputPath: "", - enableQualityCheck: false, - enableNotification: false, }); - const synthesisTypes = [ - { - id: "qa", - name: "生成问答对", - icon: MessageSquare, - count: 14, - expanded: true, - description: "基于文本生成各类问答对", - children: [ - { - id: "qa_judge", - name: "文字生成问答对_判断题", - count: 1, - description: "生成判断题形式的问答对", - }, - { - id: "qa_choice", - name: "文字生成问答对_选择题", - count: 0, - description: "生成多选题形式的问答对", - }, - { - id: "qa_fill", - name: "文字生成问答对_填空题", - count: 0, - description: "生成填空题形式的问答对", - }, - { - id: "qa_short", - name: "相关文本描述问答对_金融领域", - count: 0, - description: "金融领域的专业问答对", - }, - ], - }, - { - id: "distillation", - name: "生成蒸馏", - icon: Brain, - count: 6, - expanded: true, - description: "知识蒸馏数据生成", - children: [ - { - id: "dist_text", - name: "相关文本生成蒸馏", - count: 0, - description: "基于文本的知识蒸馏", - }, - { - id: "dist_qa", - name: "问答数据", - count: 0, - description: "问答形式的蒸馏数据", - }, - { - id: "dist_instruct", - name: "相关指令生成蒸馏问题_few-shot", - count: 0, - description: "Few-shot指令蒸馏", - }, - { - id: "dist_summary", - name: "问答数据为基础蒸馏", - count: 0, - description: "基于问答数据的蒸馏", - }, - { - id: "dist_reasoning", - name: "问答数据为基础高质量", - count: 0, - description: "高质量推理数据蒸馏", - }, - ], - }, - ]; - - const toggleTypeExpansion = (typeId: string) => { - setExpandedTypes((prev) => - prev.includes(typeId) - ? prev.filter((id) => id !== typeId) - : [...prev, typeId] - ); + const handleValuesChange: NonNullable[0]["onValuesChange"]> = ( + _changed, + allValues + ) => { + setFormValues(allValues as CreateTaskFormValues); }; - const handleSynthesisTypeSelect = (typeId: string) => { - setSelectedSynthesisTypes((prev) => { - if (prev.includes(typeId)) { - return prev.filter((id) => id !== typeId); - } else { - return [...prev, typeId]; - } - }); - }; - - const handleValuesChange = (_, allValues) => { - setFormValues({ ...formValues, ...allValues }); - }; - - const handleSelectAllFiles = () => { - const filteredFiles = files.filter((file) => - file.name.toLowerCase().includes(searchQuery.toLowerCase()) - ); - if (selectedFiles.length === filteredFiles.length) { - setSelectedFiles([]); - } else { - setSelectedFiles(filteredFiles.map((file) => file.id)); - } - }; - - const handleRemoveSelectedFile = (fileId: string) => { - setSelectedFiles(selectedFiles.filter((id) => id !== fileId)); - }; + // 当选择文件变化时,同步 selectedFiles 为 ID 列表 + useEffect(() => { + const ids = Object.values(selectedMap).map((f) => String(f.id)); + setSelectedFiles(ids); + }, [selectedMap]); const handleCreateTask = async () => { try { - const values = await form.validateFields(); - if ( - !values.name || - !values.sourceDataset || - selectedFiles.length === 0 || - selectedSynthesisTypes.length === 0 || - !values.outputPath || - !values.targetCount || - (values.executionMode === "scheduled" && !values.scheduleStrategy) - ) { - message.error("请填写所有必填项"); + const values = (await form.validateFields()) as CreateTaskFormValues; + // precise validation + if (!(taskType === "qa" || taskType === "cot")) { + message.error("请选择一个合成类型"); + return; + } + if (!selectedModel) { + message.error("请选择模型"); + return; + } + if (selectedFiles.length === 0) { + message.error("请至少选择一个文件"); return; } - const newTask: SynthesisTask = { - id: Date.now(), - name: values.name, - type: selectedSynthesisTypes[0].includes("qa") ? "qa" : "distillation", - status: values.executionMode === "immediate" ? "pending" : "paused", - progress: 0, - sourceDataset: values.sourceDataset, - targetCount: values.targetCount, - generatedCount: 0, - createdAt: new Date().toISOString().split("T")[0], - template: "自动生成模板", - estimatedTime: "预计 30 分钟", + // 构造后端要求的参数格式 + const payload = { + name: values.name || form.getFieldValue("name"), // 必选,确保传递 + description: values.description ?? "", // 可选,始终传递 + model_id: selectedModel, + source_file_id: selectedFiles, + text_split_config: { + chunk_size: sliceConfig.chunkSize, + chunk_overlap: sliceConfig.overlapSize, + }, + synthesis_config: { + prompt_template: promptTemplate, + }, + synthesis_type: taskType === "qa" ? "QA" : "COT", }; - setTasks([newTask, ...tasks]); - setShowCreateTask(false); - setCreateStep(1); + setSubmitting(true); + const res = (await createSynthesisTaskUsingPost( + payload as unknown as Record + )) as CreateTaskApiResponse; - // Reset form - form.resetFields(); - setSelectedFiles([]); + const ok = + res?.success === true || + res?.code === "0" || + res?.code === 0 || + typeof res?.data !== "undefined"; - // Auto-start simulation if immediate execution - if (values.executionMode === "immediate") { - setTimeout(() => { - setTasks((prev) => - prev.map((task) => - task.id === newTask.id ? { ...task, status: "running" } : task - ) - ); - - const interval = setInterval(() => { - setTasks((prev) => - prev.map((task) => { - if (task.id === newTask.id && task.status === "running") { - const newProgress = Math.min( - task.progress + Math.random() * 8 + 2, - 100 - ); - const isCompleted = newProgress >= 100; - return { - ...task, - progress: newProgress, - generatedCount: Math.floor( - (newProgress / 100) * task.targetCount - ), - status: isCompleted ? "completed" : "running", - estimatedTime: isCompleted - ? "已完成" - : `剩余 ${Math.ceil((100 - newProgress) / 10)} 分钟`, - }; - } - return task; - }) - ); - }, 1000); - - setTimeout(() => clearInterval(interval), 12000); - }, 1000); + if (ok) { + message.success("合成任务创建成功"); + navigate("/data/synthesis/task"); + } else { + message.error(res?.message || "合成任务创建失败"); + } + } catch (error) { + if (typeof error === "object" && error && "errorFields" in error) { + message.error("请填写所有必填项"); + return; } - } catch { - // 校验失败 + console.error(error); + message.error((error instanceof Error ? error.message : "合成任务创建失败")); + } finally { + setSubmitting(false); } }; + // 仅两个一级类型,无二级目录 + const synthesisTypes = [ + { id: "qa", name: "生成问答对" }, + { id: "cot", name: "生成COT链式推理" }, + ] as const; + + const handleSynthesisTypeSelect = (typeId: "qa" | "cot") => { + setSelectedSynthesisTypes((prev) => { + const next = prev.includes(typeId) ? [] : [typeId]; + if (next[0] === "qa") setTaskType("qa"); + if (next[0] === "cot") setTaskType("cot"); + return next; + }); + }; + + useEffect(() => { + // 进入第二步时,若未选择类型,默认选择 QA,避免误报 + if (createStep === 2 && !(taskType === "qa" || taskType === "cot")) { + setTaskType("qa"); + setSelectedSynthesisTypes(["qa"]); + } + }, [createStep, taskType]); + const renderCreateTaskPage = () => { if (createStep === 1) { return (
-
+

基本信息

- + - - - -