11import os
22from fastapi import APIRouter , HTTPException
33from app .schemas .dataset import DatasetIn , DatasetOut
4- from app .services . dataset_registry import DatasetRegistry , _DATASET_REGISTRY
4+ from app .core . container import container
55from app .api .v1 .resp import ok , created
66from app .api .v1 .envelope import ApiResponse
77from app .api .v1 .errors import *
88
99
1010router = APIRouter (tags = ["datasets" ])
11- _registry = _DATASET_REGISTRY
1211
1312@router .get ("/" , response_model = ApiResponse [list [DatasetOut ]], operation_id = "list_datasets" , summary = "返回目前所有注册的数据集列表,包含每个数据集的条目数和文件大小" )
1413def list_datasets ():
1514 """返回所有数据集列表,每个数据集包含条目数(num_samples)和文件大小(file_size)信息"""
16- return ok (_registry .list ())
15+ return ok (container . dataset_registry .list ())
1716
1817@router .post ("/" , response_model = ApiResponse [DatasetOut ], operation_id = "register_dataset" , summary = "注册一个新的数据集或更新已有数据集的信息,根据路径作为唯一主键" )
1918def register_dataset (payload : DatasetIn ):
2019 try :
21- ds = _registry .add_or_update (payload .model_dump (mode = "json" )) # to dict
20+ ds = container . dataset_registry .add_or_update (payload .model_dump (mode = "json" )) # to dict
2221 except Exception as e :
2322 raise HTTPException (400 , f"Failed to register dataset: { e } " )
2423 return created (ds )
2524
2625@router .get ("/{ds_id}" , response_model = ApiResponse [DatasetOut ], operation_id = "get_dataset" , summary = "根据数据集 ID 获取数据集信息" )
2726def get_dataset (ds_id : str ):
28- ds = _registry .get (ds_id )
27+ ds = container . dataset_registry .get (ds_id )
2928 if not ds :
3029 raise HTTPException (404 , "Dataset not found" )
3130 return ok (ds )
3231
3332@router .delete ("/{ds_id}" , response_model = ApiResponse [dict ], operation_id = "delete_dataset" , summary = "根据数据集 ID 删除数据集" )
3433def delete_dataset (ds_id : str ):
35- ds = _registry .get (ds_id )
34+ ds = container . dataset_registry .get (ds_id )
3635 if not ds :
3736 raise HTTPException (404 , "Dataset not found" )
38- _registry .remove (ds_id )
37+ container . dataset_registry .remove (ds_id )
3938 return ok (message = "Dataset deleted" )
4039
4140
4241# getting sample data for visualization
43- from app .services .visualize_dataset import VisualizeDatasetService
44- _visualize_service = VisualizeDatasetService ()
4542@router .get ("/pandas_type_sample/{ds_id}" , response_model = ApiResponse [str ], operation_id = "get_pandas_data" , summary = "获取指定数据集的 Pandas 类型样本数据,用于前端展示预览,可以通过start和end参数控制获取多少数据" )
4643def get_pandas_data (ds_id : str , start : int = 0 , end : int = 5 ):
4744 try :
48- ds = _registry .get (ds_id )
45+ ds = container . dataset_registry .get (ds_id )
4946 if not ds :
5047 raise HTTPException (404 , "Dataset not found" )
51- return ok (_visualize_service .get_pandas_read_function (ds , start , end ))
48+ return ok (container . dataset_visualize_service .get_pandas_read_function (ds , start , end ))
5249 except Exception as e :
5350 raise HTTPException (500 , f"Failed to get pandas data: { e } " )
5451
@@ -57,10 +54,10 @@ def get_pandas_data(ds_id: str, start: int = 0, end: int = 5):
5754@router .get ("/file_type_sample/{ds_id}" , operation_id = "get_file_type_data" , summary = "获取指定数据集的文件类型样本数据,用于前端展示下载,可以是图片、文本等" )
5855def get_file_type_data (ds_id : str ):
5956 try :
60- ds = _registry .get (ds_id )
57+ ds = container . dataset_registry .get (ds_id )
6158 if not ds :
6259 raise HTTPException (404 , "Dataset not found" )
63- file_path , media_type = _visualize_service .get_other_visualization_data (ds )
60+ file_path , media_type = container . dataset_visualize_service .get_other_visualization_data (ds )
6461 except Exception as e :
6562 raise HTTPException (500 , f"Failed to get file type data: { e } " )
6663
@@ -82,7 +79,7 @@ def get_dataset_preview(ds_id: str, num_lines: int = 5):
8279 预览内容的列表,每个元素是一个字典
8380 """
8481 try :
85- preview_data = _registry .preview (ds_id , num_lines )
82+ preview_data = container . dataset_registry .preview (ds_id , num_lines )
8683 return ok (preview_data )
8784 except FileNotFoundError :
8885 raise HTTPException (404 , "Dataset not found" )
@@ -100,7 +97,7 @@ def get_dataset_columns(ds_id: str):
10097 列名列表,如果不支持则返回空列表
10198 """
10299 try :
103- columns_data = _registry .get_columns (ds_id )
100+ columns_data = container . dataset_registry .get_columns (ds_id )
104101 return ok (columns_data )
105102 except FileNotFoundError :
106103 raise HTTPException (404 , "Dataset not found" )
0 commit comments