|
1 | 1 | import uuid |
2 | | -from xml.etree.ElementTree import tostring |
3 | 2 |
|
4 | | -from sqlalchemy import Column, String, Text, Integer, JSON, TIMESTAMP, ForeignKey, func |
5 | | -from sqlalchemy.orm import relationship |
| 3 | +from sqlalchemy import Column, String, Text, Integer, JSON, TIMESTAMP, func |
6 | 4 |
|
7 | 5 | from app.db.session import Base |
8 | 6 | from app.module.generation.schema.generation import CreateSynthesisTaskRequest |
9 | 7 |
|
10 | 8 |
|
11 | 9 | async def save_synthesis_task(db_session, synthesis_task: CreateSynthesisTaskRequest): |
12 | | - """保存数据合成任务。""" |
13 | | - # 转换为模型实例 |
| 10 | + """保存数据合成任务。 |
| 11 | +
|
| 12 | + 注意:当前 MySQL 表 `t_data_synth_instances` 结构中只包含 synth_type / synth_config 等字段, |
| 13 | + 没有 model_id、text_split_config、source_file_id、result_data_location 等列,因此这里只保存 |
| 14 | + 与表结构一致的字段,其他信息由上层逻辑或其它表负责管理。 |
| 15 | + """ |
14 | 16 | gid = str(uuid.uuid4()) |
15 | | - synthesis_task_instance = DataSynthesisInstance( |
| 17 | + |
| 18 | + # 兼容旧请求结构:从请求对象中提取必要字段, |
| 19 | + # - 合成类型:synthesis_type -> synth_type |
| 20 | + # - 合成配置:text_split_config + synthesis_config 合并后写入 synth_config |
| 21 | + |
| 22 | + synth_task_instance = DataSynthInstance( |
16 | 23 | id=gid, |
17 | 24 | name=synthesis_task.name, |
18 | 25 | description=synthesis_task.description, |
19 | 26 | status="pending", |
20 | | - model_id=synthesis_task.model_id, |
21 | | - synthesis_type=synthesis_task.synthesis_type.value, |
| 27 | + synth_type=synthesis_task.synthesis_type.value, |
22 | 28 | progress=0, |
23 | | - result_data_location=f"/dataset/synthesis_results/{gid}/", |
24 | | - text_split_config=synthesis_task.text_split_config.model_dump(), |
25 | | - synthesis_config=synthesis_task.synthesis_config.model_dump(), |
26 | | - source_file_id=synthesis_task.source_file_id, |
27 | | - total_files=len(synthesis_task.source_file_id), |
| 29 | + synth_config=synthesis_task.synth_config.model_dump(), |
| 30 | + total_files=len(synthesis_task.source_file_id or []), |
28 | 31 | processed_files=0, |
29 | 32 | total_chunks=0, |
30 | 33 | processed_chunks=0, |
31 | | - total_synthesis_data=0, |
| 34 | + total_synth_data=0, |
32 | 35 | created_at=func.now(), |
33 | 36 | updated_at=func.now(), |
34 | 37 | created_by="system", |
35 | | - updated_by="system" |
| 38 | + updated_by="system", |
36 | 39 | ) |
37 | | - db_session.add(synthesis_task_instance) |
| 40 | + db_session.add(synth_task_instance) |
38 | 41 | await db_session.commit() |
39 | | - await db_session.refresh(synthesis_task_instance) |
40 | | - return synthesis_task_instance |
| 42 | + await db_session.refresh(synth_task_instance) |
| 43 | + return synth_task_instance |
41 | 44 |
|
42 | 45 |
|
43 | | -class DataSynthesisInstance(Base): |
44 | | - """数据合成任务表,对应表 t_data_synthesis_instances |
| 46 | +class DataSynthInstance(Base): |
| 47 | + """数据合成任务表,对应表 t_data_synth_instances |
45 | 48 |
|
46 | | - create table if not exists t_data_synthesis_instances |
| 49 | + create table if not exists t_data_synth_instances |
47 | 50 | ( |
48 | 51 | id VARCHAR(36) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci PRIMARY KEY COMMENT 'UUID', |
49 | 52 | name VARCHAR(255) NOT NULL COMMENT '任务名称', |
50 | 53 | description TEXT COMMENT '任务描述', |
51 | 54 | status VARCHAR(20) COMMENT '任务状态', |
52 | | - synthesis_type VARCHAR(20) NOT NULL COMMENT '合成类型', |
53 | | - model_id VARCHAR(255) NOT NULL COMMENT '模型ID', |
| 55 | + synth_type VARCHAR(20) NOT NULL COMMENT '合成类型', |
54 | 56 | progress INT DEFAULT 0 COMMENT '任务进度(百分比)', |
55 | | - result_data_location VARCHAR(1000) COMMENT '结果数据存储位置', |
56 | | - text_split_config JSON NOT NULL COMMENT '文本切片配置', |
57 | | - synthesis_config JSON NOT NULL COMMENT '合成配置', |
58 | | - source_file_id JSON NOT NULL COMMENT '原始文件ID列表', |
| 57 | + synth_config JSON NOT NULL COMMENT '合成配置', |
59 | 58 | total_files INT DEFAULT 0 COMMENT '总文件数', |
60 | 59 | processed_files INT DEFAULT 0 COMMENT '已处理文件数', |
61 | 60 | total_chunks INT DEFAULT 0 COMMENT '总文本块数', |
62 | 61 | processed_chunks INT DEFAULT 0 COMMENT '已处理文本块数', |
63 | | - total_synthesis_data INT DEFAULT 0 COMMENT '总合成数据量', |
| 62 | + total_synth_data INT DEFAULT 0 COMMENT '总合成数据量', |
64 | 63 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', |
65 | 64 | updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', |
66 | 65 | created_by VARCHAR(255) COMMENT '创建者', |
67 | 66 | updated_by VARCHAR(255) COMMENT '更新者' |
68 | 67 | ) COMMENT='数据合成任务表(UUID 主键)'; |
69 | 68 | """ |
70 | 69 |
|
71 | | - __tablename__ = "t_data_synthesis_instances" |
| 70 | + __tablename__ = "t_data_synth_instances" |
72 | 71 |
|
73 | 72 | id = Column(String(36), primary_key=True, index=True, comment="UUID") |
74 | 73 | name = Column(String(255), nullable=False, comment="任务名称") |
75 | 74 | description = Column(Text, nullable=True, comment="任务描述") |
76 | 75 | status = Column(String(20), nullable=True, comment="任务状态") |
77 | | - synthesis_type = Column(String(20), nullable=False, comment="合成类型") |
78 | | - model_id = Column(String(255), nullable=False, comment="模型ID") |
| 76 | + # 与数据库字段保持一致:synth_type / synth_config |
| 77 | + synth_type = Column(String(20), nullable=False, comment="合成类型") |
79 | 78 | progress = Column(Integer, nullable=False, default=0, comment="任务进度(百分比)") |
80 | | - result_data_location = Column(String(1000), nullable=True, comment="结果数据存储位置") |
81 | | - text_split_config = Column(JSON, nullable=False, comment="文本切片配置") |
82 | | - synthesis_config = Column(JSON, nullable=False, comment="合成配置") |
83 | | - source_file_id = Column(JSON, nullable=False, comment="原始文件ID列表") |
| 79 | + synth_config = Column(JSON, nullable=False, comment="合成配置") |
84 | 80 | total_files = Column(Integer, nullable=False, default=0, comment="总文件数") |
85 | 81 | processed_files = Column(Integer, nullable=False, default=0, comment="已处理文件数") |
86 | 82 | total_chunks = Column(Integer, nullable=False, default=0, comment="总文本块数") |
87 | 83 | processed_chunks = Column(Integer, nullable=False, default=0, comment="已处理文本块数") |
88 | | - total_synthesis_data = Column(Integer, nullable=False, default=0, comment="总合成数据量") |
89 | | - |
90 | | - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), nullable=True, comment="创建时间") |
91 | | - updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), nullable=True, comment="更新时间") |
| 84 | + total_synth_data = Column(Integer, nullable=False, default=0, comment="总合成数据量") |
| 85 | + created_at = Column(TIMESTAMP, nullable=False, default=func.now(), comment="创建时间") |
| 86 | + updated_at = Column( |
| 87 | + TIMESTAMP, |
| 88 | + nullable=False, |
| 89 | + default=func.now(), |
| 90 | + onupdate=func.now(), |
| 91 | + comment="更新时间", |
| 92 | + ) |
92 | 93 | created_by = Column(String(255), nullable=True, comment="创建者") |
93 | 94 | updated_by = Column(String(255), nullable=True, comment="更新者") |
94 | 95 |
|
@@ -123,7 +124,7 @@ class DataSynthesisFileInstance(Base): |
123 | 124 | ) |
124 | 125 | file_name = Column(String(255), nullable=False, comment="文件名") |
125 | 126 | source_file_id = Column(String(255), nullable=False, comment="原始文件ID") |
126 | | - target_file_location = Column(String(1000), nullable=False, comment="目标文件存储位置") |
| 127 | + target_file_location = Column(String(1000), nullable=True, comment="目标文件存储位置") |
127 | 128 | status = Column(String(20), nullable=True, comment="任务状态") |
128 | 129 | total_chunks = Column(Integer, nullable=False, default=0, comment="总文本块数") |
129 | 130 | processed_chunks = Column(Integer, nullable=False, default=0, comment="已处理文本块数") |
|
0 commit comments