Skip to content

Commit ad9f41f

Browse files
authored
feat: Dataset pagination; camelCase support in schemas (#22)
implement pagination for dataset mappings. update response models to support camelCase parameters.
1 parent f9dbefd commit ad9f41f

File tree

5 files changed

+195
-38
lines changed

5 files changed

+195
-38
lines changed

runtime/label-studio-adapter/app/api/project/list.py

Lines changed: 59 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,60 @@
11
from fastapi import APIRouter, Depends, HTTPException, Query
22
from sqlalchemy.ext.asyncio import AsyncSession
33
from typing import List
4+
import math
45

56
from app.db.database import get_db
67
from app.services.dataset_mapping_service import DatasetMappingService
78
from app.schemas.dataset_mapping import DatasetMappingResponse
8-
from app.schemas import StandardResponse
9+
from app.schemas.common import StandardResponse, PaginatedData
910
from app.core.logging import get_logger
1011
from . import project_router
1112

1213
logger = get_logger(__name__)
1314

14-
@project_router.get("/mappings/list", response_model=StandardResponse[List[DatasetMappingResponse]])
15+
@project_router.get("/mappings/list", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
1516
async def list_mappings(
16-
skip: int = Query(0, ge=0, description="Number of records to skip"),
17-
limit: int = Query(100, ge=1, le=1000, description="Maximum number of records to return"),
17+
page: int = Query(1, ge=1, description="页码(从1开始)"),
18+
page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
1819
db: AsyncSession = Depends(get_db)
1920
):
2021
"""
21-
查询所有映射关系
22+
查询所有映射关系(分页)
2223
23-
返回所有有效的数据集映射关系(未被软删除的)
24+
返回所有有效的数据集映射关系(未被软删除的),支持分页查询
2425
"""
2526
try:
2627
service = DatasetMappingService(db)
2728

28-
logger.info(f"Listing mappings, skip={skip}, limit={limit}")
29+
# 计算 skip
30+
skip = (page - 1) * page_size
2931

30-
mappings = await service.get_all_mappings(skip=skip, limit=limit)
32+
logger.info(f"Listing mappings, page={page}, page_size={page_size}")
3133

32-
logger.info(f"Found {len(mappings)} mappings")
34+
# 获取数据和总数
35+
mappings, total = await service.get_all_mappings_with_count(
36+
skip=skip,
37+
limit=page_size
38+
)
39+
40+
# 计算总页数
41+
total_pages = math.ceil(total / page_size) if total > 0 else 0
42+
43+
# 构造分页响应
44+
paginated_data = PaginatedData(
45+
page=page,
46+
size=page_size,
47+
total_elements=total,
48+
total_pages=total_pages,
49+
content=mappings
50+
)
51+
52+
logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
3353

3454
return StandardResponse(
3555
code=200,
3656
message="success",
37-
data=mappings
57+
data=paginated_data
3858
)
3959

4060
except Exception as e:
@@ -78,29 +98,51 @@ async def get_mapping(
7898
raise HTTPException(status_code=500, detail="Internal server error")
7999

80100

81-
@project_router.get("/mappings/by-source/{source_dataset_id}", response_model=StandardResponse[List[DatasetMappingResponse]])
101+
@project_router.get("/mappings/by-source/{source_dataset_id}", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
82102
async def get_mappings_by_source(
83103
source_dataset_id: str,
104+
page: int = Query(1, ge=1, description="页码(从1开始)"),
105+
page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
84106
db: AsyncSession = Depends(get_db)
85107
):
86108
"""
87-
根据源数据集 ID 查询所有映射关系
109+
根据源数据集 ID 查询所有映射关系(分页)
88110
89-
返回该数据集创建的所有标注项目(包括已删除的)
111+
返回该数据集创建的所有标注项目(不包括已删除的),支持分页查询
90112
"""
91113
try:
92114
service = DatasetMappingService(db)
93115

94-
logger.info(f"Get mappings by source dataset id: {source_dataset_id}")
116+
# 计算 skip
117+
skip = (page - 1) * page_size
95118

96-
mappings = await service.get_mappings_by_source_dataset_id(source_dataset_id)
119+
logger.info(f"Get mappings by source dataset id: {source_dataset_id}, page={page}, page_size={page_size}")
120+
121+
# 获取数据和总数
122+
mappings, total = await service.get_mappings_by_source_with_count(
123+
source_dataset_id=source_dataset_id,
124+
skip=skip,
125+
limit=page_size
126+
)
127+
128+
# 计算总页数
129+
total_pages = math.ceil(total / page_size) if total > 0 else 0
130+
131+
# 构造分页响应
132+
paginated_data = PaginatedData(
133+
page=page,
134+
size=page_size,
135+
total_elements=total,
136+
total_pages=total_pages,
137+
content=mappings
138+
)
97139

98-
logger.info(f"Found {len(mappings)} mappings")
140+
logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
99141

100142
return StandardResponse(
101143
code=200,
102144
message="success",
103-
data=mappings
145+
data=paginated_data
104146
)
105147

106148
except HTTPException:

runtime/label-studio-adapter/app/schemas/common.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,27 @@
11
"""
22
通用响应模型
33
"""
4-
from typing import Generic, TypeVar, Optional
4+
from typing import Generic, TypeVar, Optional, List
55
from pydantic import BaseModel, Field
66

77
# 定义泛型类型变量
88
T = TypeVar('T')
99

10-
class StandardResponse(BaseModel, Generic[T]):
10+
# 定义一个将 snake_case 转换为 camelCase 的函数
11+
def to_camel(string: str) -> str:
12+
"""将 snake_case 字符串转换为 camelCase"""
13+
components = string.split('_')
14+
# 首字母小写,其余单词首字母大写
15+
return components[0] + ''.join(x.title() for x in components[1:])
16+
17+
class BaseResponseModel(BaseModel):
18+
"""基础响应模型,启用别名生成器"""
19+
20+
class Config:
21+
populate_by_name = True
22+
alias_generator = to_camel
23+
24+
class StandardResponse(BaseResponseModel, Generic[T]):
1125
"""
1226
标准API响应格式
1327
@@ -18,10 +32,32 @@ class StandardResponse(BaseModel, Generic[T]):
1832
data: Optional[T] = Field(None, description="响应数据")
1933

2034
class Config:
35+
populate_by_name = True
36+
alias_generator = to_camel
2137
json_schema_extra = {
2238
"example": {
2339
"code": 200,
2440
"message": "success",
2541
"data": {}
2642
}
2743
}
44+
45+
46+
class PaginatedData(BaseResponseModel, Generic[T]):
47+
"""分页数据容器"""
48+
page: int = Field(..., description="当前页码(从1开始)")
49+
size: int = Field(..., description="页大小")
50+
total_elements: int = Field(..., description="总条数")
51+
total_pages: int = Field(..., description="总页数")
52+
content: List[T] = Field(..., description="当前页数据")
53+
54+
class Config:
55+
json_schema_extra = {
56+
"example": {
57+
"page": 1,
58+
"size": 20,
59+
"totalElements": 100,
60+
"totalPages": 5,
61+
"content": []
62+
}
63+
}

runtime/label-studio-adapter/app/schemas/dataset_mapping.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,25 @@
1-
from pydantic import BaseModel, Field
1+
from pydantic import Field
22
from typing import Optional
33
from datetime import datetime
44

5-
class DatasetMappingBase(BaseModel):
5+
from .common import BaseResponseModel
6+
7+
class DatasetMappingBase(BaseResponseModel):
68
"""数据集映射 基础模型"""
79
source_dataset_id: str = Field(..., description="源数据集ID")
810

911
class DatasetMappingCreateRequest(DatasetMappingBase):
1012
"""数据集映射 创建 请求模型"""
1113
pass
1214

13-
class DatasetMappingCreateResponse(BaseModel):
15+
class DatasetMappingCreateResponse(BaseResponseModel):
1416
"""数据集映射 创建 响应模型"""
1517
mapping_id: str = Field(..., description="映射UUID")
1618
labelling_project_id: str = Field(..., description="Label Studio项目ID")
1719
labelling_project_name: str = Field(..., description="Label Studio项目名称")
1820
message: str = Field(..., description="响应消息")
1921

20-
class DatasetMappingUpdateRequest(BaseModel):
22+
class DatasetMappingUpdateRequest(BaseResponseModel):
2123
"""数据集映射 更新 请求模型"""
2224
source_dataset_id: Optional[str] = Field(None, description="源数据集ID")
2325

@@ -32,21 +34,22 @@ class DatasetMappingResponse(DatasetMappingBase):
3234

3335
class Config:
3436
from_attributes = True
37+
populate_by_name = True
3538

36-
class SyncDatasetRequest(BaseModel):
39+
class SyncDatasetRequest(BaseResponseModel):
3740
"""同步数据集请求模型"""
3841
mapping_id: str = Field(..., description="映射ID(mapping UUID)")
3942
batch_size: int = Field(50, ge=1, le=100, description="批处理大小")
4043

41-
class SyncDatasetResponse(BaseModel):
44+
class SyncDatasetResponse(BaseResponseModel):
4245
"""同步数据集响应模型"""
4346
mapping_id: str = Field(..., description="映射UUID")
4447
status: str = Field(..., description="同步状态")
4548
synced_files: int = Field(..., description="已同步文件数量")
4649
total_files: int = Field(0, description="总文件数量")
4750
message: str = Field(..., description="响应消息")
4851

49-
class DeleteDatasetResponse(BaseModel):
52+
class DeleteDatasetResponse(BaseResponseModel):
5053
"""删除数据集响应模型"""
5154
mapping_id: str = Field(..., description="映射UUID")
5255
status: str = Field(..., description="删除状态")
Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
from pydantic import BaseModel, Field
1+
from pydantic import Field
22
from typing import Dict, Any, Optional, List
33
from datetime import datetime
4+
from .common import BaseResponseModel
45

5-
class LabelStudioProject(BaseModel):
6+
class LabelStudioProject(BaseResponseModel):
67
"""Label Studio项目模型"""
78
id: int = Field(..., description="项目ID")
89
title: str = Field(..., description="项目标题")
@@ -11,27 +12,27 @@ class LabelStudioProject(BaseModel):
1112
created_at: Optional[datetime] = Field(None, description="创建时间")
1213
updated_at: Optional[datetime] = Field(None, description="更新时间")
1314

14-
class LabelStudioTaskData(BaseModel):
15+
class LabelStudioTaskData(BaseResponseModel):
1516
"""Label Studio任务数据模型"""
1617
image: Optional[str] = Field(None, description="图像URL")
1718
text: Optional[str] = Field(None, description="文本内容")
1819
audio: Optional[str] = Field(None, description="音频URL")
1920
video: Optional[str] = Field(None, description="视频URL")
2021
filename: Optional[str] = Field(None, description="文件名")
2122

22-
class LabelStudioTask(BaseModel):
23+
class LabelStudioTask(BaseResponseModel):
2324
"""Label Studio任务模型"""
2425
data: LabelStudioTaskData = Field(..., description="任务数据")
2526
project: Optional[int] = Field(None, description="项目ID")
2627
meta: Optional[Dict[str, Any]] = Field(None, description="元数据")
2728

28-
class LabelStudioCreateProjectRequest(BaseModel):
29+
class LabelStudioCreateProjectRequest(BaseResponseModel):
2930
"""创建Label Studio项目请求模型"""
3031
title: str = Field(..., description="项目标题")
3132
description: str = Field("", description="项目描述")
3233
label_config: str = Field(..., description="标注配置")
3334

34-
class LabelStudioCreateTaskRequest(BaseModel):
35+
class LabelStudioCreateTaskRequest(BaseResponseModel):
3536
"""创建Label Studio任务请求模型"""
3637
data: Dict[str, Any] = Field(..., description="任务数据")
3738
project: Optional[int] = Field(None, description="项目ID")

runtime/label-studio-adapter/app/services/dataset_mapping_service.py

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from sqlalchemy.ext.asyncio import AsyncSession
22
from sqlalchemy.future import select
3-
from sqlalchemy import update
4-
from typing import Optional, List
3+
from sqlalchemy import update, func
4+
from typing import Optional, List, Tuple
55
from datetime import datetime
66
import uuid
77

@@ -213,11 +213,86 @@ async def get_all_mappings(
213213
logger.debug(f"Found {len(mappings)} mappings")
214214
return [DatasetMappingResponse.model_validate(mapping) for mapping in mappings]
215215

216-
async def count_mappings(self) -> int:
216+
async def count_mappings(self, include_deleted: bool = False) -> int:
217217
"""统计映射总数"""
218+
query = select(func.count()).select_from(DatasetMapping)
219+
220+
if not include_deleted:
221+
query = query.where(DatasetMapping.deleted_at.is_(None))
222+
223+
result = await self.db.execute(query)
224+
return result.scalar_one()
225+
226+
async def get_all_mappings_with_count(
227+
self,
228+
skip: int = 0,
229+
limit: int = 100,
230+
include_deleted: bool = False
231+
) -> Tuple[List[DatasetMappingResponse], int]:
232+
"""获取所有映射及总数(用于分页)"""
233+
logger.debug(f"List all mappings with count, skip: {skip}, limit: {limit}")
234+
235+
# 构建查询
236+
query = select(DatasetMapping)
237+
if not include_deleted:
238+
query = query.where(DatasetMapping.deleted_at.is_(None))
239+
240+
# 获取总数
241+
count_query = select(func.count()).select_from(DatasetMapping)
242+
if not include_deleted:
243+
count_query = count_query.where(DatasetMapping.deleted_at.is_(None))
244+
245+
count_result = await self.db.execute(count_query)
246+
total = count_result.scalar_one()
247+
248+
# 获取数据
218249
result = await self.db.execute(
219-
select(DatasetMapping)
220-
.where(DatasetMapping.deleted_at.is_(None))
250+
query
251+
.offset(skip)
252+
.limit(limit)
253+
.order_by(DatasetMapping.created_at.desc())
254+
)
255+
mappings = result.scalars().all()
256+
257+
logger.debug(f"Found {len(mappings)} mappings, total: {total}")
258+
return [DatasetMappingResponse.model_validate(mapping) for mapping in mappings], total
259+
260+
async def get_mappings_by_source_with_count(
261+
self,
262+
source_dataset_id: str,
263+
skip: int = 0,
264+
limit: int = 100,
265+
include_deleted: bool = False
266+
) -> Tuple[List[DatasetMappingResponse], int]:
267+
"""根据源数据集ID获取映射关系及总数(用于分页)"""
268+
logger.debug(f"Get mappings by source dataset id with count: {source_dataset_id}")
269+
270+
# 构建查询
271+
query = select(DatasetMapping).where(
272+
DatasetMapping.source_dataset_id == source_dataset_id
273+
)
274+
275+
if not include_deleted:
276+
query = query.where(DatasetMapping.deleted_at.is_(None))
277+
278+
# 获取总数
279+
count_query = select(func.count()).select_from(DatasetMapping).where(
280+
DatasetMapping.source_dataset_id == source_dataset_id
281+
)
282+
if not include_deleted:
283+
count_query = count_query.where(DatasetMapping.deleted_at.is_(None))
284+
285+
count_result = await self.db.execute(count_query)
286+
total = count_result.scalar_one()
287+
288+
# 获取数据
289+
result = await self.db.execute(
290+
query
291+
.offset(skip)
292+
.limit(limit)
293+
.order_by(DatasetMapping.created_at.desc())
221294
)
222295
mappings = result.scalars().all()
223-
return len(mappings)
296+
297+
logger.debug(f"Found {len(mappings)} mappings, total: {total}")
298+
return [DatasetMappingResponse.model_validate(mapping) for mapping in mappings], total

0 commit comments

Comments
 (0)