Skip to content

Commit 744d15b

Browse files
authored
fix: 修复评估时模型输出json格式不对导致读取错误的问题 (#133)
* feature: add cot data evaluation function * fix: added verification to evaluation results * fix: fix the prompt for evaluating * fix: 修复当评估结果为空导致读取失败的问题
1 parent 31c4966 commit 744d15b

File tree

14 files changed

+372
-218
lines changed

14 files changed

+372
-218
lines changed

frontend/src/pages/DataEvaluation/Create/CreateTask.tsx

Lines changed: 59 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// TypeScript
12
import React, { useState, useEffect } from 'react';
23
import { Button, Form, Input, Select, message, Modal, Row, Col, Table, Space } from 'antd';
34
import { EyeOutlined } from '@ant-design/icons';
@@ -36,6 +37,7 @@ interface CreateTaskModalProps {
3637

3738
const TASK_TYPES = [
3839
{ label: 'QA评估', value: 'QA' },
40+
{ label: 'COT评估', value: 'COT' },
3941
];
4042

4143
const EVAL_METHODS = [
@@ -55,7 +57,7 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
5557
dimension: '',
5658
description: ''
5759
});
58-
const [taskType, setTaskType] = useState<string>("QA");
60+
const [taskType, setTaskType] = useState<string>(DEFAULT_TASK_TYPE);
5961
const [promptTemplates, setPromptTemplates] = useState<PromptTemplate[]>([]);
6062
const [previewVisible, setPreviewVisible] = useState(false);
6163
const [evaluationPrompt, setEvaluationPrompt] = useState('');
@@ -82,9 +84,24 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
8284
fetchDatasets().then();
8385
fetchModels().then();
8486
fetchPromptTemplates().then();
87+
// sync form with local taskType default
88+
form.setFieldsValue({ taskType: DEFAULT_TASK_TYPE });
8589
}
8690
}, [visible]);
8791

92+
// when promptTemplates or taskType change, switch dimensions to template defaults (COT/QA)
93+
useEffect(() => {
94+
if (!promptTemplates || promptTemplates.length === 0) return;
95+
const template = promptTemplates.find(t => t.evalType === taskType);
96+
if (template && template.defaultDimensions) {
97+
setDimensions(template.defaultDimensions.map((dim: any, index: number) => ({
98+
key: `dim-${index}`,
99+
dimension: dim.dimension,
100+
description: dim.description
101+
})));
102+
}
103+
}, [taskType, promptTemplates]);
104+
88105
const fetchDatasets = async () => {
89106
try {
90107
const { data } = await queryDatasetsUsingGet({ page: 1, size: 1000 });
@@ -106,31 +123,46 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
106123
};
107124

108125
const formatDimensionsForPrompt = (dimensions: Dimension[]) => {
109-
let result = "\n";
126+
let result = "";
110127
dimensions.forEach((dim, index) => {
111-
result += `### ${index + 1}. ${dim.dimension}\n**评估标准:**\n${dim.description}\n\n`;
128+
if (index > 0) {
129+
result += "\n";
130+
}
131+
result += `### ${index + 1}. ${dim.dimension}\n**评估标准:**\n${dim.description}`;
132+
if (index < dimensions.length - 1) {
133+
result += "\n";
134+
}
112135
});
113136
return result;
114137
};
115138

116139
const formatResultExample = (dimensions: Dimension[]) => {
117-
return dimensions.map(dim => `\n "${dim.dimension}": "Y",`).join('');
140+
let result = "";
141+
dimensions.forEach((dim, index) => {
142+
if (index > 0) {
143+
result += "\n ";
144+
}
145+
result += `"${dim.dimension}": "Y"`;
146+
if (index < dimensions.length - 1) {
147+
result += ",";
148+
}
149+
});
150+
return result;
118151
};
119152

120153
const fetchPromptTemplates = async () => {
121154
try {
122155
const response = await queryPromptTemplatesUsingGet();
123-
const templates: PromptTemplate[] = response.data?.templates
124-
setPromptTemplates(templates)
125-
if (taskType) {
126-
const template = templates.find(t => t.evalType === taskType);
127-
if (template) {
128-
setDimensions(template.defaultDimensions.map((dim: any, index: number) => ({
129-
key: `dim-${index}`,
130-
dimension: dim.dimension,
131-
description: dim.description
132-
})));
133-
}
156+
const templates: PromptTemplate[] = response.data?.templates || [];
157+
setPromptTemplates(templates);
158+
// if a template exists for current taskType, initialize dimensions (handled also by useEffect)
159+
const template = templates.find(t => t.evalType === taskType);
160+
if (template) {
161+
setDimensions(template.defaultDimensions.map((dim: any, index: number) => ({
162+
key: `dim-${index}`,
163+
dimension: dim.dimension,
164+
description: dim.description
165+
})));
134166
}
135167
} catch (error) {
136168
console.error('Error fetching prompt templates:', error);
@@ -144,8 +176,11 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
144176
return;
145177
}
146178
const template = promptTemplates.find(t => t.evalType === taskType);
147-
setEvaluationPrompt(template?.prompt.replace("{dimensions}", formatDimensionsForPrompt(dimensions))
148-
.replace('{result_example}', formatResultExample(dimensions)));
179+
const basePrompt = template?.prompt || '';
180+
const filled = basePrompt
181+
.replace('{dimensions}', formatDimensionsForPrompt(dimensions))
182+
.replace('{result_example}', formatResultExample(dimensions));
183+
setEvaluationPrompt(filled);
149184
setPreviewVisible(true);
150185
};
151186

@@ -243,6 +278,13 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
243278
evalMethod: DEFAULT_EVAL_METHOD,
244279
taskType: DEFAULT_TASK_TYPE,
245280
}}
281+
onValuesChange={(changed) => {
282+
if (changed.taskType) {
283+
setTaskType(changed.taskType);
284+
setEvaluationPrompt('');
285+
setPreviewVisible(false);
286+
}
287+
}}
246288
>
247289
<Row gutter={16}>
248290
<Col span={12}>

frontend/src/pages/DataEvaluation/Detail/components/EvaluationItems.tsx

Lines changed: 53 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import { useEffect, useState } from 'react';
2-
import { Table, Typography, Button, Space, Spin, Empty, message, Tooltip } from 'antd';
2+
import { Table, Typography, Button, Space, Empty, Tooltip } from 'antd';
33
import { FolderOpen, FileText, ArrowLeft } from 'lucide-react';
44
import { queryEvaluationFilesUsingGet, queryEvaluationItemsUsingGet } from '../../evaluation.api';
5+
import useFetchData from '@/hooks/useFetchData';
56

67
const { Text } = Typography;
78

@@ -39,63 +40,52 @@ type EvalItem = {
3940
};
4041

4142
export default function EvaluationItems({ task }: { task: any }) {
42-
const [loadingFiles, setLoadingFiles] = useState<boolean>(false);
43-
const [files, setFiles] = useState<EvalFile[]>([]);
44-
const [filePagination, setFilePagination] = useState({ current: 1, pageSize: 10, total: 0 });
45-
4643
const [selectedFile, setSelectedFile] = useState<{ fileId: string; fileName: string } | null>(null);
47-
const [loadingItems, setLoadingItems] = useState<boolean>(false);
48-
const [items, setItems] = useState<EvalItem[]>([]);
49-
const [itemPagination, setItemPagination] = useState({ current: 1, pageSize: 10, total: 0 });
5044

51-
// Fetch files list
52-
useEffect(() => {
53-
if (!task?.id || selectedFile) return;
54-
const fetchFiles = async () => {
55-
setLoadingFiles(true);
56-
try {
57-
const res = await queryEvaluationFilesUsingGet({ taskId: task.id, page: filePagination.current, size: filePagination.pageSize });
58-
const data = res?.data;
59-
const list: EvalFile[] = data?.content || [];
60-
setFiles(list);
61-
setFilePagination((p) => ({ ...p, total: data?.totalElements || 0 }));
62-
} catch (e) {
63-
message.error('加载评估文件失败');
64-
console.error(e);
65-
} finally {
66-
setLoadingFiles(false);
45+
// 文件列表数据(使用 useFetchData),pageOffset=0 表示后端分页为 1 基
46+
const {
47+
loading: loadingFiles,
48+
tableData: files,
49+
pagination: filePagination,
50+
setSearchParams: setFileSearchParams,
51+
} = useFetchData<EvalFile>(
52+
(params) => queryEvaluationFilesUsingGet({ taskId: task?.id, ...params }),
53+
(d) => d as unknown as EvalFile,
54+
30000,
55+
false,
56+
[],
57+
0
58+
);
59+
60+
// 评估条目数据(使用 useFetchData),依赖选中文件
61+
const {
62+
loading: loadingItems,
63+
tableData: items,
64+
pagination: itemPagination,
65+
setSearchParams: setItemSearchParams,
66+
fetchData: fetchItems,
67+
} = useFetchData<EvalItem>(
68+
(params) => {
69+
if (!task?.id || !selectedFile?.fileId) {
70+
return Promise.resolve({ data: { content: [], totalElements: 0 } });
6771
}
68-
};
69-
fetchFiles();
70-
// eslint-disable-next-line react-hooks/exhaustive-deps
71-
}, [task?.id, filePagination.current, filePagination.pageSize, selectedFile]);
72+
return queryEvaluationItemsUsingGet({ taskId: task.id, file_id: selectedFile.fileId, ...params });
73+
},
74+
(d) => d as unknown as EvalItem,
75+
30000,
76+
false,
77+
[],
78+
0
79+
);
7280

73-
// Fetch items of selected file
81+
// 当选择文件变化时,主动触发一次条目查询,避免仅依赖 searchParams 变更导致未触发
7482
useEffect(() => {
75-
if (!task?.id || !selectedFile) return;
76-
const fetchItems = async () => {
77-
setLoadingItems(true);
78-
try {
79-
const res = await queryEvaluationItemsUsingGet({
80-
taskId: task.id,
81-
page: itemPagination.current,
82-
size: itemPagination.pageSize,
83-
file_id: selectedFile.fileId,
84-
});
85-
const data = res?.data;
86-
const list: EvalItem[] = data?.content || [];
87-
setItems(list);
88-
setItemPagination((p) => ({ ...p, total: data?.totalElements || 0 }));
89-
} catch (e) {
90-
message.error('加载评估条目失败');
91-
console.error(e);
92-
} finally {
93-
setLoadingItems(false);
94-
}
95-
};
96-
fetchItems();
97-
// eslint-disable-next-line react-hooks/exhaustive-deps
98-
}, [task?.id, selectedFile?.fileId, itemPagination.current, itemPagination.pageSize]);
83+
if (task?.id && selectedFile?.fileId) {
84+
setItemSearchParams((prev: any) => ({ ...prev, current: 1 }));
85+
// 立即拉取一次,保证点击后立刻出现数据
86+
fetchItems();
87+
}
88+
}, [task?.id, selectedFile?.fileId]);
9989

10090
const fileColumns = [
10191
{
@@ -228,19 +218,20 @@ export default function EvaluationItems({ task }: { task: any }) {
228218
dataSource={files}
229219
loading={loadingFiles}
230220
size="middle"
231-
onRow={(record) => ({ onClick: () => setSelectedFile({ fileId: record.fileId, fileName: record.fileName }) })}
232-
pagination={{
233-
current: filePagination.current,
234-
pageSize: filePagination.pageSize,
235-
total: filePagination.total,
236-
onChange: (current, pageSize) => setFilePagination({ current, pageSize, total: filePagination.total }),
237-
}}
221+
onRow={(record) => ({
222+
onClick: () => {
223+
setSelectedFile({ fileId: record.fileId, fileName: record.fileName });
224+
// 切换文件时,重置条目表到第一页
225+
setItemSearchParams((prev: any) => ({ ...prev, current: 1 }));
226+
},
227+
})}
228+
pagination={filePagination}
238229
/>
239230
) : (
240231
<div className="flex flex-col gap-3">
241232
<div className="sticky top-0 z-10 bg-white py-2" style={{ borderBottom: '1px solid #f0f0f0' }}>
242233
<Space wrap>
243-
<Button icon={<ArrowLeft size={16} />} onClick={() => { setSelectedFile(null); setItems([]); }}>
234+
<Button icon={<ArrowLeft size={16} />} onClick={() => { setSelectedFile(null); }}>
244235
返回文件列表
245236
</Button>
246237
<Space>
@@ -257,12 +248,7 @@ export default function EvaluationItems({ task }: { task: any }) {
257248
dataSource={items}
258249
loading={loadingItems}
259250
size="middle"
260-
pagination={{
261-
current: itemPagination.current,
262-
pageSize: itemPagination.pageSize,
263-
total: itemPagination.total,
264-
onChange: (current, pageSize) => setItemPagination({ current, pageSize, total: itemPagination.total }),
265-
}}
251+
pagination={itemPagination}
266252
/>
267253
</div>
268254
)}

frontend/src/pages/DataEvaluation/Home/DataEvaluation.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,14 @@ export default function DataEvaluationPage() {
8282
label: '任务类型',
8383
options: [
8484
{ value: 'QA', label: 'QA评估' },
85+
{ value: 'COT', label: 'COPT评估' },
8586
],
8687
},
8788
{
8889
key: 'evalMethod',
8990
label: '评估方式',
9091
options: [
9192
{ value: 'AUTO', label: '自动评估' },
92-
{ value: 'MANUAL', label: '人工评估' },
9393
],
9494
},
9595
];

runtime/datamate-python/app/db/models/data_evaluation.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class EvaluationTask(Base):
3232
source_id = Column(String(36), nullable=True, comment="待评估对象ID")
3333
source_name = Column(String(255), nullable=True, comment="待评估对象名称")
3434
status = Column(String(50), server_default="PENDING", nullable=False, comment="状态:PENDING/RUNNING/COMPLETED/STOPPED/FAILED")
35+
eval_method = Column(String(50), server_default="AUTO", nullable=False, comment="评估方式:AUTO/MANUAL")
3536
eval_process = Column(Float, nullable=False, server_default="0", comment="评估进度")
3637
eval_prompt = Column(Text, nullable=True, comment="评估提示词")
3738
eval_config = Column(Text, nullable=True, comment="评估配置")

0 commit comments

Comments
 (0)