Skip to content

Commit df853a5

Browse files
authored
feat: Enhance file tag update functionality with automatic format conversion (#84)
- Updated `update_file_tags` to support both simplified and full tag formats. - Introduced `TagFormatConverter` to handle conversion from simplified external tags to internal storage format. - Added logic to fetch and utilize the appropriate annotation template for conversion. - Improved error handling for missing templates and unknown controls during tag updates. - Created example script demonstrating the usage of the new tag format conversion feature. - Added unit tests for `TagFormatConverter` to ensure correct functionality and edge case handling.
1 parent 5cef9cb commit df853a5

File tree

10 files changed

+1127
-54
lines changed

10 files changed

+1127
-54
lines changed

runtime/datamate-python/app/module/annotation/interface/project.py

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -150,23 +150,34 @@ async def create_mapping(
150150
async def list_mappings(
151151
page: int = Query(1, ge=1, description="页码(从1开始)"),
152152
page_size: int = Query(20, ge=1, le=100, description="每页记录数", alias="pageSize"),
153+
include_template: bool = Query(False, description="是否包含模板详情", alias="includeTemplate"),
153154
db: AsyncSession = Depends(get_db)
154155
):
155156
"""
156157
查询所有映射关系(分页)
157158
158-
返回所有有效的数据集映射关系(未被软删除的),支持分页查询
159+
返回所有有效的数据集映射关系(未被软删除的),支持分页查询。
160+
可选择是否包含完整的标注模板信息(默认不包含,以提高列表查询性能)。
161+
162+
参数:
163+
- page: 页码(从1开始)
164+
- pageSize: 每页记录数
165+
- includeTemplate: 是否包含模板详情(默认false)
159166
"""
160167
try:
161168
service = DatasetMappingService(db)
162169

163170
# 计算 skip
164171
skip = (page - 1) * page_size
165172

173+
logger.info(f"List mappings: page={page}, page_size={page_size}, include_template={include_template}")
174+
166175
# 获取数据和总数
167176
mappings, total = await service.get_all_mappings_with_count(
168-
skip=skip,
169-
limit=page_size
177+
skip=skip,
178+
limit=page_size,
179+
include_deleted=False,
180+
include_template=include_template
170181
)
171182

172183
# 计算总页数
@@ -181,7 +192,7 @@ async def list_mappings(
181192
content=mappings
182193
)
183194

184-
logger.info(f"List mappings: page={page}, returned {len(mappings)}/{total}")
195+
logger.info(f"List mappings: page={page}, returned {len(mappings)}/{total}, templates_included: {include_template}")
185196

186197
return StandardResponse(
187198
code=200,
@@ -199,22 +210,29 @@ async def get_mapping(
199210
db: AsyncSession = Depends(get_db)
200211
):
201212
"""
202-
根据 UUID 查询单个映射关系
213+
根据 UUID 查询单个映射关系(包含关联的标注模板详情)
214+
215+
返回数据集映射关系以及关联的完整标注模板信息,包括:
216+
- 映射基本信息
217+
- 数据集信息
218+
- Label Studio 项目信息
219+
- 完整的标注模板配置(如果存在)
203220
"""
204221
try:
205222
service = DatasetMappingService(db)
206223

207-
logger.info(f"Get mapping: {mapping_id}")
224+
logger.info(f"Get mapping with template details: {mapping_id}")
208225

209-
mapping = await service.get_mapping_by_uuid(mapping_id)
226+
# 获取映射,并包含完整的模板信息
227+
mapping = await service.get_mapping_by_uuid(mapping_id, include_template=True)
210228

211229
if not mapping:
212230
raise HTTPException(
213231
status_code=404,
214232
detail=f"Mapping not found: {mapping_id}"
215233
)
216234

217-
logger.info(f"Found mapping: {mapping.id}")
235+
logger.info(f"Found mapping: {mapping.id}, template_included: {mapping.template is not None}")
218236

219237
return StandardResponse(
220238
code=200,
@@ -233,26 +251,35 @@ async def get_mappings_by_source(
233251
dataset_id: str,
234252
page: int = Query(1, ge=1, description="页码(从1开始)"),
235253
page_size: int = Query(20, ge=1, le=100, description="每页记录数", alias="pageSize"),
254+
include_template: bool = Query(True, description="是否包含模板详情", alias="includeTemplate"),
236255
db: AsyncSession = Depends(get_db)
237256
):
238257
"""
239-
根据源数据集 ID 查询所有映射关系(分页)
258+
根据源数据集 ID 查询所有映射关系(分页,包含模板详情)
259+
260+
返回该数据集创建的所有标注项目(不包括已删除的),支持分页查询。
261+
默认包含关联的完整标注模板信息。
240262
241-
返回该数据集创建的所有标注项目(不包括已删除的),支持分页查询
263+
参数:
264+
- dataset_id: 数据集ID
265+
- page: 页码(从1开始)
266+
- pageSize: 每页记录数
267+
- includeTemplate: 是否包含模板详情(默认true)
242268
"""
243269
try:
244270
service = DatasetMappingService(db)
245271

246272
# 计算 skip
247273
skip = (page - 1) * page_size
248274

249-
logger.info(f"Get mappings by source dataset id: {dataset_id}, page={page}, page_size={page_size}")
275+
logger.info(f"Get mappings by source dataset id: {dataset_id}, page={page}, page_size={page_size}, include_template={include_template}")
250276

251-
# 获取数据和总数
277+
# 获取数据和总数(包含模板信息)
252278
mappings, total = await service.get_mappings_by_source_with_count(
253279
dataset_id=dataset_id,
254280
skip=skip,
255-
limit=page_size
281+
limit=page_size,
282+
include_template=include_template
256283
)
257284

258285
# 计算总页数
@@ -267,7 +294,7 @@ async def get_mappings_by_source(
267294
content=mappings
268295
)
269296

270-
logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
297+
logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}, templates_included: {include_template}")
271298

272299
return StandardResponse(
273300
code=200,

runtime/datamate-python/app/module/annotation/interface/task.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -244,27 +244,61 @@ async def update_file_tags(
244244
db: AsyncSession = Depends(get_db)
245245
):
246246
"""
247-
Update File Tags (Partial Update)
247+
Update File Tags (Partial Update with Auto Format Conversion)
248248
249249
接收部分标签更新并合并到指定文件(只修改提交的标签,其余保持不变),并更新 `tags_updated_at`。
250+
251+
支持两种标签格式:
252+
1. 简化格式(外部用户提交):
253+
[{"from_name": "label", "to_name": "image", "values": ["cat", "dog"]}]
254+
255+
2. 完整格式(内部存储):
256+
[{"id": "...", "from_name": "label", "to_name": "image", "type": "choices",
257+
"value": {"choices": ["cat", "dog"]}}]
258+
259+
系统会自动根据数据集关联的模板将简化格式转换为完整格式。
250260
请求与响应使用 Pydantic 模型 `UpdateFileTagsRequest` / `UpdateFileTagsResponse`。
251261
"""
252262
service = DatasetManagementService(db)
253263

264+
# 首先获取文件所属的数据集
265+
from sqlalchemy.future import select
266+
from app.db.models import DatasetFiles
267+
268+
result = await db.execute(
269+
select(DatasetFiles).where(DatasetFiles.id == file_id)
270+
)
271+
file_record = result.scalar_one_or_none()
272+
273+
if not file_record:
274+
raise HTTPException(status_code=404, detail=f"File not found: {file_id}")
275+
276+
dataset_id = str(file_record.dataset_id) # type: ignore - Convert Column to str
277+
278+
# 查找数据集关联的模板ID
279+
from ..service.mapping import DatasetMappingService
280+
281+
mapping_service = DatasetMappingService(db)
282+
template_id = await mapping_service.get_template_id_by_dataset_id(dataset_id)
283+
284+
if template_id:
285+
logger.info(f"Found template {template_id} for dataset {dataset_id}, will auto-convert tag format")
286+
else:
287+
logger.warning(f"No template found for dataset {dataset_id}, tags must be in full format")
288+
289+
# 更新标签(如果有模板ID则自动转换格式)
254290
success, error_msg, updated_at = await service.update_file_tags_partial(
255291
file_id=file_id,
256-
new_tags=request.tags
292+
new_tags=request.tags,
293+
template_id=template_id # 传递模板ID以启用自动转换
257294
)
258295

259296
if not success:
260297
if "not found" in (error_msg or "").lower():
261298
raise HTTPException(status_code=404, detail=error_msg)
262299
raise HTTPException(status_code=500, detail=error_msg or "更新标签失败")
263300

264-
# 获取更新后的完整标签列表
265-
from sqlalchemy.future import select
266-
from app.db.models import DatasetFiles
267-
301+
# 重新获取更新后的文件记录(获取完整标签列表)
268302
result = await db.execute(
269303
select(DatasetFiles).where(DatasetFiles.id == file_id)
270304
)

runtime/datamate-python/app/module/annotation/schema/__init__.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,6 @@
33
TagConfigResponse
44
)
55

6-
from .mapping import (
7-
DatasetMappingCreateRequest,
8-
DatasetMappingCreateResponse,
9-
DatasetMappingUpdateRequest,
10-
DatasetMappingResponse,
11-
DeleteDatasetResponse,
12-
)
13-
146
from .sync import (
157
SyncDatasetRequest,
168
SyncDatasetResponse,
@@ -30,6 +22,17 @@
3022
AnnotationTemplateListResponse
3123
)
3224

25+
from .mapping import (
26+
DatasetMappingCreateRequest,
27+
DatasetMappingCreateResponse,
28+
DatasetMappingUpdateRequest,
29+
DatasetMappingResponse,
30+
DeleteDatasetResponse,
31+
)
32+
33+
# Rebuild model to resolve forward references
34+
DatasetMappingResponse.model_rebuild()
35+
3336
__all__ = [
3437
"ConfigResponse",
3538
"TagConfigResponse",

runtime/datamate-python/app/module/annotation/schema/mapping.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
from pydantic import Field, BaseModel
2-
from typing import Optional
2+
from typing import Optional, TYPE_CHECKING
33
from datetime import datetime
44

55
from app.module.shared.schema import BaseResponseModel
66
from app.module.shared.schema import StandardResponse
77

8+
if TYPE_CHECKING:
9+
from .template import AnnotationTemplateResponse
10+
811

912
class DatasetMappingCreateRequest(BaseModel):
1013
"""数据集映射 创建 请求模型
@@ -42,6 +45,8 @@ class DatasetMappingResponse(BaseModel):
4245
labeling_project_id: str = Field(..., alias="labelingProjectId", description="标注项目ID")
4346
name: Optional[str] = Field(None, description="标注项目名称")
4447
description: Optional[str] = Field(None, description="标注项目描述")
48+
template_id: Optional[str] = Field(None, alias="templateId", description="关联的模板ID")
49+
template: Optional['AnnotationTemplateResponse'] = Field(None, description="关联的标注模板详情")
4550
created_at: datetime = Field(..., alias="createdAt", description="创建时间")
4651
updated_at: Optional[datetime] = Field(None, alias="updatedAt", description="更新时间")
4752
deleted_at: Optional[datetime] = Field(None, alias="deletedAt", description="删除时间")

0 commit comments

Comments
 (0)